LLVM  10.0.0svn
LegalizerHelper.cpp
Go to the documentation of this file.
1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
24 #include "llvm/Support/Debug.h"
27 
28 #define DEBUG_TYPE "legalizer"
29 
30 using namespace llvm;
31 using namespace LegalizeActions;
32 
33 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
34 ///
35 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
36 /// with any leftover piece as type \p LeftoverTy
37 ///
38 /// Returns -1 in the first element of the pair if the breakdown is not
39 /// satisfiable.
40 static std::pair<int, int>
41 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
42  assert(!LeftoverTy.isValid() && "this is an out argument");
43 
44  unsigned Size = OrigTy.getSizeInBits();
45  unsigned NarrowSize = NarrowTy.getSizeInBits();
46  unsigned NumParts = Size / NarrowSize;
47  unsigned LeftoverSize = Size - NumParts * NarrowSize;
48  assert(Size > NarrowSize);
49 
50  if (LeftoverSize == 0)
51  return {NumParts, 0};
52 
53  if (NarrowTy.isVector()) {
54  unsigned EltSize = OrigTy.getScalarSizeInBits();
55  if (LeftoverSize % EltSize != 0)
56  return {-1, -1};
57  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
58  } else {
59  LeftoverTy = LLT::scalar(LeftoverSize);
60  }
61 
62  int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
63  return std::make_pair(NumParts, NumLeftover);
64 }
65 
67  GISelChangeObserver &Observer,
68  MachineIRBuilder &Builder)
69  : MIRBuilder(Builder), MRI(MF.getRegInfo()),
70  LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
71  MIRBuilder.setMF(MF);
72  MIRBuilder.setChangeObserver(Observer);
73 }
74 
76  GISelChangeObserver &Observer,
78  : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
79  MIRBuilder.setMF(MF);
80  MIRBuilder.setChangeObserver(Observer);
81 }
84  LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
85 
86  if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
87  MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
88  return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized
90  auto Step = LI.getAction(MI, MRI);
91  switch (Step.Action) {
92  case Legal:
93  LLVM_DEBUG(dbgs() << ".. Already legal\n");
94  return AlreadyLegal;
95  case Libcall:
96  LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
97  return libcall(MI);
98  case NarrowScalar:
99  LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
100  return narrowScalar(MI, Step.TypeIdx, Step.NewType);
101  case WidenScalar:
102  LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
103  return widenScalar(MI, Step.TypeIdx, Step.NewType);
104  case Lower:
105  LLVM_DEBUG(dbgs() << ".. Lower\n");
106  return lower(MI, Step.TypeIdx, Step.NewType);
107  case FewerElements:
108  LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
109  return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
110  case MoreElements:
111  LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
112  return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
113  case Custom:
114  LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
115  return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
117  default:
118  LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
119  return UnableToLegalize;
120  }
121 }
122 
123 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
124  SmallVectorImpl<Register> &VRegs) {
125  for (int i = 0; i < NumParts; ++i)
127  MIRBuilder.buildUnmerge(VRegs, Reg);
128 }
129 
130 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
131  LLT MainTy, LLT &LeftoverTy,
133  SmallVectorImpl<Register> &LeftoverRegs) {
134  assert(!LeftoverTy.isValid() && "this is an out argument");
135 
136  unsigned RegSize = RegTy.getSizeInBits();
137  unsigned MainSize = MainTy.getSizeInBits();
138  unsigned NumParts = RegSize / MainSize;
139  unsigned LeftoverSize = RegSize - NumParts * MainSize;
140 
141  // Use an unmerge when possible.
142  if (LeftoverSize == 0) {
143  for (unsigned I = 0; I < NumParts; ++I)
144  VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
145  MIRBuilder.buildUnmerge(VRegs, Reg);
146  return true;
147  }
148 
149  if (MainTy.isVector()) {
150  unsigned EltSize = MainTy.getScalarSizeInBits();
151  if (LeftoverSize % EltSize != 0)
152  return false;
153  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
154  } else {
155  LeftoverTy = LLT::scalar(LeftoverSize);
156  }
157 
158  // For irregular sizes, extract the individual parts.
159  for (unsigned I = 0; I != NumParts; ++I) {
160  Register NewReg = MRI.createGenericVirtualRegister(MainTy);
161  VRegs.push_back(NewReg);
162  MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
163  }
164 
165  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
166  Offset += LeftoverSize) {
167  Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
168  LeftoverRegs.push_back(NewReg);
169  MIRBuilder.buildExtract(NewReg, Reg, Offset);
170  }
171 
172  return true;
173 }
174 
175 static LLT getGCDType(LLT OrigTy, LLT TargetTy) {
176  if (OrigTy.isVector() && TargetTy.isVector()) {
177  assert(OrigTy.getElementType() == TargetTy.getElementType());
178  int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
179  TargetTy.getNumElements());
180  return LLT::scalarOrVector(GCD, OrigTy.getElementType());
181  }
182 
183  if (OrigTy.isVector() && !TargetTy.isVector()) {
184  assert(OrigTy.getElementType() == TargetTy);
185  return TargetTy;
186  }
187 
188  assert(!OrigTy.isVector() && !TargetTy.isVector());
189 
190  int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(),
191  TargetTy.getSizeInBits());
192  return LLT::scalar(GCD);
193 }
194 
195 void LegalizerHelper::insertParts(Register DstReg,
196  LLT ResultTy, LLT PartTy,
197  ArrayRef<Register> PartRegs,
198  LLT LeftoverTy,
199  ArrayRef<Register> LeftoverRegs) {
200  if (!LeftoverTy.isValid()) {
201  assert(LeftoverRegs.empty());
202 
203  if (!ResultTy.isVector()) {
204  MIRBuilder.buildMerge(DstReg, PartRegs);
205  return;
206  }
207 
208  if (PartTy.isVector())
209  MIRBuilder.buildConcatVectors(DstReg, PartRegs);
210  else
211  MIRBuilder.buildBuildVector(DstReg, PartRegs);
212  return;
213  }
214 
215  unsigned PartSize = PartTy.getSizeInBits();
216  unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
217 
218  Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
219  MIRBuilder.buildUndef(CurResultReg);
220 
221  unsigned Offset = 0;
222  for (Register PartReg : PartRegs) {
223  Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
224  MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
225  CurResultReg = NewResultReg;
226  Offset += PartSize;
227  }
228 
229  for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
230  // Use the original output register for the final insert to avoid a copy.
231  Register NewResultReg = (I + 1 == E) ?
232  DstReg : MRI.createGenericVirtualRegister(ResultTy);
233 
234  MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
235  CurResultReg = NewResultReg;
236  Offset += LeftoverPartSize;
237  }
238 }
239 
240 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
241  switch (Opcode) {
242  case TargetOpcode::G_SDIV:
243  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
244  switch (Size) {
245  case 32:
246  return RTLIB::SDIV_I32;
247  case 64:
248  return RTLIB::SDIV_I64;
249  case 128:
250  return RTLIB::SDIV_I128;
251  default:
252  llvm_unreachable("unexpected size");
253  }
254  case TargetOpcode::G_UDIV:
255  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
256  switch (Size) {
257  case 32:
258  return RTLIB::UDIV_I32;
259  case 64:
260  return RTLIB::UDIV_I64;
261  case 128:
262  return RTLIB::UDIV_I128;
263  default:
264  llvm_unreachable("unexpected size");
265  }
266  case TargetOpcode::G_SREM:
267  assert((Size == 32 || Size == 64) && "Unsupported size");
268  return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
269  case TargetOpcode::G_UREM:
270  assert((Size == 32 || Size == 64) && "Unsupported size");
271  return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
272  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
273  assert(Size == 32 && "Unsupported size");
274  return RTLIB::CTLZ_I32;
275  case TargetOpcode::G_FADD:
276  assert((Size == 32 || Size == 64) && "Unsupported size");
277  return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
278  case TargetOpcode::G_FSUB:
279  assert((Size == 32 || Size == 64) && "Unsupported size");
280  return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
281  case TargetOpcode::G_FMUL:
282  assert((Size == 32 || Size == 64) && "Unsupported size");
283  return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
284  case TargetOpcode::G_FDIV:
285  assert((Size == 32 || Size == 64) && "Unsupported size");
286  return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
287  case TargetOpcode::G_FEXP:
288  assert((Size == 32 || Size == 64) && "Unsupported size");
289  return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
290  case TargetOpcode::G_FEXP2:
291  assert((Size == 32 || Size == 64) && "Unsupported size");
292  return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32;
293  case TargetOpcode::G_FREM:
294  return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
295  case TargetOpcode::G_FPOW:
296  return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
297  case TargetOpcode::G_FMA:
298  assert((Size == 32 || Size == 64) && "Unsupported size");
299  return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
300  case TargetOpcode::G_FSIN:
301  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
302  return Size == 128 ? RTLIB::SIN_F128
303  : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
304  case TargetOpcode::G_FCOS:
305  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
306  return Size == 128 ? RTLIB::COS_F128
307  : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
308  case TargetOpcode::G_FLOG10:
309  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
310  return Size == 128 ? RTLIB::LOG10_F128
311  : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
312  case TargetOpcode::G_FLOG:
313  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
314  return Size == 128 ? RTLIB::LOG_F128
315  : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
316  case TargetOpcode::G_FLOG2:
317  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
318  return Size == 128 ? RTLIB::LOG2_F128
319  : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
320  case TargetOpcode::G_FCEIL:
321  assert((Size == 32 || Size == 64) && "Unsupported size");
322  return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32;
323  case TargetOpcode::G_FFLOOR:
324  assert((Size == 32 || Size == 64) && "Unsupported size");
325  return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32;
326  }
327  llvm_unreachable("Unknown libcall function");
328 }
329 
330 /// True if an instruction is in tail position in its caller. Intended for
331 /// legalizing libcalls as tail calls when possible.
333  const Function &F = MI.getParent()->getParent()->getFunction();
334 
335  // Conservatively require the attributes of the call to match those of
336  // the return. Ignore NoAlias and NonNull because they don't affect the
337  // call sequence.
338  AttributeList CallerAttrs = F.getAttributes();
339  if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
340  .removeAttribute(Attribute::NoAlias)
341  .removeAttribute(Attribute::NonNull)
342  .hasAttributes())
343  return false;
344 
345  // It's not safe to eliminate the sign / zero extension of the return value.
346  if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
347  CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
348  return false;
349 
350  // Only tail call if the following instruction is a standard return.
351  auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
352  MachineInstr *Next = MI.getNextNode();
353  if (!Next || TII.isTailCall(*Next) || !Next->isReturn())
354  return false;
355 
356  return true;
357 }
358 
361  const CallLowering::ArgInfo &Result,
363  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
364  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
365  const char *Name = TLI.getLibcallName(Libcall);
366 
368  Info.CallConv = TLI.getLibcallCallingConv(Libcall);
369  Info.Callee = MachineOperand::CreateES(Name);
370  Info.OrigRet = Result;
371  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
372  if (!CLI.lowerCall(MIRBuilder, Info))
374 
376 }
377 
378 // Useful for libcalls where all operands have the same type.
381  Type *OpType) {
382  auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
383 
385  for (unsigned i = 1; i < MI.getNumOperands(); i++)
386  Args.push_back({MI.getOperand(i).getReg(), OpType});
387  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
388  Args);
389 }
390 
393  MachineInstr &MI) {
394  assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
395  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
396 
398  // Add all the args, except for the last which is an imm denoting 'tail'.
399  for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) {
400  Register Reg = MI.getOperand(i).getReg();
401 
402  // Need derive an IR type for call lowering.
403  LLT OpLLT = MRI.getType(Reg);
404  Type *OpTy = nullptr;
405  if (OpLLT.isPointer())
406  OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
407  else
408  OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
409  Args.push_back({Reg, OpTy});
410  }
411 
412  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
413  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
415  RTLIB::Libcall RTLibcall;
416  switch (ID) {
417  case Intrinsic::memcpy:
418  RTLibcall = RTLIB::MEMCPY;
419  break;
420  case Intrinsic::memset:
421  RTLibcall = RTLIB::MEMSET;
422  break;
423  case Intrinsic::memmove:
424  RTLibcall = RTLIB::MEMMOVE;
425  break;
426  default:
428  }
429  const char *Name = TLI.getLibcallName(RTLibcall);
430 
431  MIRBuilder.setInstr(MI);
432 
434  Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
435  Info.Callee = MachineOperand::CreateES(Name);
437  Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 &&
439 
440  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
441  if (!CLI.lowerCall(MIRBuilder, Info))
443 
444  if (Info.LoweredTailCall) {
445  assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
446  // We must have a return following the call to get past
447  // isLibCallInTailPosition.
448  assert(MI.getNextNode() && MI.getNextNode()->isReturn() &&
449  "Expected instr following MI to be a return?");
450 
451  // We lowered a tail call, so the call is now the return from the block.
452  // Delete the old return.
454  }
455 
457 }
458 
459 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
460  Type *FromType) {
461  auto ToMVT = MVT::getVT(ToType);
462  auto FromMVT = MVT::getVT(FromType);
463 
464  switch (Opcode) {
465  case TargetOpcode::G_FPEXT:
466  return RTLIB::getFPEXT(FromMVT, ToMVT);
467  case TargetOpcode::G_FPTRUNC:
468  return RTLIB::getFPROUND(FromMVT, ToMVT);
469  case TargetOpcode::G_FPTOSI:
470  return RTLIB::getFPTOSINT(FromMVT, ToMVT);
471  case TargetOpcode::G_FPTOUI:
472  return RTLIB::getFPTOUINT(FromMVT, ToMVT);
473  case TargetOpcode::G_SITOFP:
474  return RTLIB::getSINTTOFP(FromMVT, ToMVT);
475  case TargetOpcode::G_UITOFP:
476  return RTLIB::getUINTTOFP(FromMVT, ToMVT);
477  }
478  llvm_unreachable("Unsupported libcall function");
479 }
480 
483  Type *FromType) {
485  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
486  {{MI.getOperand(1).getReg(), FromType}});
487 }
488 
491  LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
492  unsigned Size = LLTy.getSizeInBits();
493  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
494 
495  MIRBuilder.setInstr(MI);
496 
497  switch (MI.getOpcode()) {
498  default:
499  return UnableToLegalize;
500  case TargetOpcode::G_SDIV:
501  case TargetOpcode::G_UDIV:
502  case TargetOpcode::G_SREM:
503  case TargetOpcode::G_UREM:
504  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
505  Type *HLTy = IntegerType::get(Ctx, Size);
506  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
507  if (Status != Legalized)
508  return Status;
509  break;
510  }
511  case TargetOpcode::G_FADD:
512  case TargetOpcode::G_FSUB:
513  case TargetOpcode::G_FMUL:
514  case TargetOpcode::G_FDIV:
515  case TargetOpcode::G_FMA:
516  case TargetOpcode::G_FPOW:
517  case TargetOpcode::G_FREM:
518  case TargetOpcode::G_FCOS:
519  case TargetOpcode::G_FSIN:
520  case TargetOpcode::G_FLOG10:
521  case TargetOpcode::G_FLOG:
522  case TargetOpcode::G_FLOG2:
523  case TargetOpcode::G_FEXP:
524  case TargetOpcode::G_FEXP2:
525  case TargetOpcode::G_FCEIL:
526  case TargetOpcode::G_FFLOOR: {
527  if (Size > 64) {
528  LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
529  return UnableToLegalize;
530  }
531  Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
532  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
533  if (Status != Legalized)
534  return Status;
535  break;
536  }
537  case TargetOpcode::G_FPEXT: {
538  // FIXME: Support other floating point types (half, fp128 etc)
539  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
540  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
541  if (ToSize != 64 || FromSize != 32)
542  return UnableToLegalize;
545  if (Status != Legalized)
546  return Status;
547  break;
548  }
549  case TargetOpcode::G_FPTRUNC: {
550  // FIXME: Support other floating point types (half, fp128 etc)
551  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
552  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
553  if (ToSize != 32 || FromSize != 64)
554  return UnableToLegalize;
557  if (Status != Legalized)
558  return Status;
559  break;
560  }
561  case TargetOpcode::G_FPTOSI:
562  case TargetOpcode::G_FPTOUI: {
563  // FIXME: Support other types
564  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
565  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
566  if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
567  return UnableToLegalize;
569  MI, MIRBuilder,
570  ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
571  FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
572  if (Status != Legalized)
573  return Status;
574  break;
575  }
576  case TargetOpcode::G_SITOFP:
577  case TargetOpcode::G_UITOFP: {
578  // FIXME: Support other types
579  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
580  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
581  if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
582  return UnableToLegalize;
584  MI, MIRBuilder,
585  ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
586  FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
587  if (Status != Legalized)
588  return Status;
589  break;
590  }
591  }
592 
593  MI.eraseFromParent();
594  return Legalized;
595 }
596 
598  unsigned TypeIdx,
599  LLT NarrowTy) {
600  MIRBuilder.setInstr(MI);
601 
602  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
603  uint64_t NarrowSize = NarrowTy.getSizeInBits();
604 
605  switch (MI.getOpcode()) {
606  default:
607  return UnableToLegalize;
608  case TargetOpcode::G_IMPLICIT_DEF: {
609  // FIXME: add support for when SizeOp0 isn't an exact multiple of
610  // NarrowSize.
611  if (SizeOp0 % NarrowSize != 0)
612  return UnableToLegalize;
613  int NumParts = SizeOp0 / NarrowSize;
614 
615  SmallVector<Register, 2> DstRegs;
616  for (int i = 0; i < NumParts; ++i)
617  DstRegs.push_back(
618  MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
619 
620  Register DstReg = MI.getOperand(0).getReg();
621  if(MRI.getType(DstReg).isVector())
622  MIRBuilder.buildBuildVector(DstReg, DstRegs);
623  else
624  MIRBuilder.buildMerge(DstReg, DstRegs);
625  MI.eraseFromParent();
626  return Legalized;
627  }
628  case TargetOpcode::G_CONSTANT: {
629  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
630  const APInt &Val = MI.getOperand(1).getCImm()->getValue();
631  unsigned TotalSize = Ty.getSizeInBits();
632  unsigned NarrowSize = NarrowTy.getSizeInBits();
633  int NumParts = TotalSize / NarrowSize;
634 
635  SmallVector<Register, 4> PartRegs;
636  for (int I = 0; I != NumParts; ++I) {
637  unsigned Offset = I * NarrowSize;
638  auto K = MIRBuilder.buildConstant(NarrowTy,
639  Val.lshr(Offset).trunc(NarrowSize));
640  PartRegs.push_back(K.getReg(0));
641  }
642 
643  LLT LeftoverTy;
644  unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
645  SmallVector<Register, 1> LeftoverRegs;
646  if (LeftoverBits != 0) {
647  LeftoverTy = LLT::scalar(LeftoverBits);
648  auto K = MIRBuilder.buildConstant(
649  LeftoverTy,
650  Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
651  LeftoverRegs.push_back(K.getReg(0));
652  }
653 
654  insertParts(MI.getOperand(0).getReg(),
655  Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
656 
657  MI.eraseFromParent();
658  return Legalized;
659  }
660  case TargetOpcode::G_SEXT: {
661  if (TypeIdx != 0)
662  return UnableToLegalize;
663 
664  Register SrcReg = MI.getOperand(1).getReg();
665  LLT SrcTy = MRI.getType(SrcReg);
666 
667  // FIXME: support the general case where the requested NarrowTy may not be
668  // the same as the source type. E.g. s128 = sext(s32)
669  if ((SrcTy.getSizeInBits() != SizeOp0 / 2) ||
670  SrcTy.getSizeInBits() != NarrowTy.getSizeInBits()) {
671  LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n");
672  return UnableToLegalize;
673  }
674 
675  // Shift the sign bit of the low register through the high register.
676  auto ShiftAmt =
677  MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1);
678  auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt);
679  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)});
680  MI.eraseFromParent();
681  return Legalized;
682  }
683  case TargetOpcode::G_ZEXT: {
684  if (TypeIdx != 0)
685  return UnableToLegalize;
686 
687  LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
688  uint64_t SizeOp1 = SrcTy.getSizeInBits();
689  if (SizeOp0 % SizeOp1 != 0)
690  return UnableToLegalize;
691 
692  // Generate a merge where the bottom bits are taken from the source, and
693  // zero everything else.
694  Register ZeroReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0);
695  unsigned NumParts = SizeOp0 / SizeOp1;
696  SmallVector<Register, 4> Srcs = {MI.getOperand(1).getReg()};
697  for (unsigned Part = 1; Part < NumParts; ++Part)
698  Srcs.push_back(ZeroReg);
699  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Srcs);
700  MI.eraseFromParent();
701  return Legalized;
702  }
703  case TargetOpcode::G_TRUNC: {
704  if (TypeIdx != 1)
705  return UnableToLegalize;
706 
707  uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
708  if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
709  LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
710  return UnableToLegalize;
711  }
712 
713  auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
714  MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Unmerge.getReg(0));
715  MI.eraseFromParent();
716  return Legalized;
717  }
718 
719  case TargetOpcode::G_ADD: {
720  // FIXME: add support for when SizeOp0 isn't an exact multiple of
721  // NarrowSize.
722  if (SizeOp0 % NarrowSize != 0)
723  return UnableToLegalize;
724  // Expand in terms of carry-setting/consuming G_ADDE instructions.
725  int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
726 
727  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
728  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
729  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
730 
731  Register CarryIn;
732  for (int i = 0; i < NumParts; ++i) {
733  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
735 
736  if (i == 0)
737  MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]);
738  else {
739  MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
740  Src2Regs[i], CarryIn);
741  }
742 
743  DstRegs.push_back(DstReg);
744  CarryIn = CarryOut;
745  }
746  Register DstReg = MI.getOperand(0).getReg();
747  if(MRI.getType(DstReg).isVector())
748  MIRBuilder.buildBuildVector(DstReg, DstRegs);
749  else
750  MIRBuilder.buildMerge(DstReg, DstRegs);
751  MI.eraseFromParent();
752  return Legalized;
753  }
754  case TargetOpcode::G_SUB: {
755  // FIXME: add support for when SizeOp0 isn't an exact multiple of
756  // NarrowSize.
757  if (SizeOp0 % NarrowSize != 0)
758  return UnableToLegalize;
759 
760  int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
761 
762  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
763  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
764  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
765 
766  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
768  MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
769  {Src1Regs[0], Src2Regs[0]});
770  DstRegs.push_back(DstReg);
771  Register BorrowIn = BorrowOut;
772  for (int i = 1; i < NumParts; ++i) {
773  DstReg = MRI.createGenericVirtualRegister(NarrowTy);
774  BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
775 
776  MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
777  {Src1Regs[i], Src2Regs[i], BorrowIn});
778 
779  DstRegs.push_back(DstReg);
780  BorrowIn = BorrowOut;
781  }
782  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
783  MI.eraseFromParent();
784  return Legalized;
785  }
786  case TargetOpcode::G_MUL:
787  case TargetOpcode::G_UMULH:
788  return narrowScalarMul(MI, NarrowTy);
789  case TargetOpcode::G_EXTRACT:
790  return narrowScalarExtract(MI, TypeIdx, NarrowTy);
791  case TargetOpcode::G_INSERT:
792  return narrowScalarInsert(MI, TypeIdx, NarrowTy);
793  case TargetOpcode::G_LOAD: {
794  const auto &MMO = **MI.memoperands_begin();
795  Register DstReg = MI.getOperand(0).getReg();
796  LLT DstTy = MRI.getType(DstReg);
797  if (DstTy.isVector())
798  return UnableToLegalize;
799 
800  if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
801  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
802  auto &MMO = **MI.memoperands_begin();
803  MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
804  MIRBuilder.buildAnyExt(DstReg, TmpReg);
805  MI.eraseFromParent();
806  return Legalized;
807  }
808 
809  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
810  }
811  case TargetOpcode::G_ZEXTLOAD:
812  case TargetOpcode::G_SEXTLOAD: {
813  bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
814  Register DstReg = MI.getOperand(0).getReg();
815  Register PtrReg = MI.getOperand(1).getReg();
816 
817  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
818  auto &MMO = **MI.memoperands_begin();
819  if (MMO.getSizeInBits() == NarrowSize) {
820  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
821  } else {
822  unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
823  : TargetOpcode::G_SEXTLOAD;
824  MIRBuilder.buildInstr(ExtLoad)
825  .addDef(TmpReg)
826  .addUse(PtrReg)
827  .addMemOperand(&MMO);
828  }
829 
830  if (ZExt)
831  MIRBuilder.buildZExt(DstReg, TmpReg);
832  else
833  MIRBuilder.buildSExt(DstReg, TmpReg);
834 
835  MI.eraseFromParent();
836  return Legalized;
837  }
838  case TargetOpcode::G_STORE: {
839  const auto &MMO = **MI.memoperands_begin();
840 
841  Register SrcReg = MI.getOperand(0).getReg();
842  LLT SrcTy = MRI.getType(SrcReg);
843  if (SrcTy.isVector())
844  return UnableToLegalize;
845 
846  int NumParts = SizeOp0 / NarrowSize;
847  unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
848  unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
849  if (SrcTy.isVector() && LeftoverBits != 0)
850  return UnableToLegalize;
851 
852  if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
853  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
854  auto &MMO = **MI.memoperands_begin();
855  MIRBuilder.buildTrunc(TmpReg, SrcReg);
856  MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
857  MI.eraseFromParent();
858  return Legalized;
859  }
860 
861  return reduceLoadStoreWidth(MI, 0, NarrowTy);
862  }
863  case TargetOpcode::G_SELECT:
864  return narrowScalarSelect(MI, TypeIdx, NarrowTy);
865  case TargetOpcode::G_AND:
866  case TargetOpcode::G_OR:
867  case TargetOpcode::G_XOR: {
868  // Legalize bitwise operation:
869  // A = BinOp<Ty> B, C
870  // into:
871  // B1, ..., BN = G_UNMERGE_VALUES B
872  // C1, ..., CN = G_UNMERGE_VALUES C
873  // A1 = BinOp<Ty/N> B1, C2
874  // ...
875  // AN = BinOp<Ty/N> BN, CN
876  // A = G_MERGE_VALUES A1, ..., AN
877  return narrowScalarBasic(MI, TypeIdx, NarrowTy);
878  }
879  case TargetOpcode::G_SHL:
880  case TargetOpcode::G_LSHR:
881  case TargetOpcode::G_ASHR:
882  return narrowScalarShift(MI, TypeIdx, NarrowTy);
883  case TargetOpcode::G_CTLZ:
884  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
885  case TargetOpcode::G_CTTZ:
886  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
887  case TargetOpcode::G_CTPOP:
888  if (TypeIdx != 0)
889  return UnableToLegalize; // TODO
890 
891  Observer.changingInstr(MI);
892  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
893  Observer.changedInstr(MI);
894  return Legalized;
895  case TargetOpcode::G_INTTOPTR:
896  if (TypeIdx != 1)
897  return UnableToLegalize;
898 
899  Observer.changingInstr(MI);
900  narrowScalarSrc(MI, NarrowTy, 1);
901  Observer.changedInstr(MI);
902  return Legalized;
903  case TargetOpcode::G_PTRTOINT:
904  if (TypeIdx != 0)
905  return UnableToLegalize;
906 
907  Observer.changingInstr(MI);
908  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
909  Observer.changedInstr(MI);
910  return Legalized;
911  case TargetOpcode::G_PHI: {
912  unsigned NumParts = SizeOp0 / NarrowSize;
913  SmallVector<Register, 2> DstRegs;
915  DstRegs.resize(NumParts);
916  SrcRegs.resize(MI.getNumOperands() / 2);
917  Observer.changingInstr(MI);
918  for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
919  MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
920  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
921  extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
922  SrcRegs[i / 2]);
923  }
924  MachineBasicBlock &MBB = *MI.getParent();
925  MIRBuilder.setInsertPt(MBB, MI);
926  for (unsigned i = 0; i < NumParts; ++i) {
927  DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
928  MachineInstrBuilder MIB =
929  MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
930  for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
931  MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
932  }
933  MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
934  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
935  Observer.changedInstr(MI);
936  MI.eraseFromParent();
937  return Legalized;
938  }
939  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
940  case TargetOpcode::G_INSERT_VECTOR_ELT: {
941  if (TypeIdx != 2)
942  return UnableToLegalize;
943 
944  int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
945  Observer.changingInstr(MI);
946  narrowScalarSrc(MI, NarrowTy, OpIdx);
947  Observer.changedInstr(MI);
948  return Legalized;
949  }
950  case TargetOpcode::G_ICMP: {
951  uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
952  if (NarrowSize * 2 != SrcSize)
953  return UnableToLegalize;
954 
955  Observer.changingInstr(MI);
956  Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
957  Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
958  MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg());
959 
960  Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
961  Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
962  MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg());
963 
964  CmpInst::Predicate Pred =
965  static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
966  LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
967 
968  if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
969  MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
970  MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
971  MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
972  MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
973  MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero);
974  } else {
975  MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
976  MachineInstrBuilder CmpHEQ =
977  MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
979  ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
980  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH);
981  }
982  Observer.changedInstr(MI);
983  MI.eraseFromParent();
984  return Legalized;
985  }
986  case TargetOpcode::G_SEXT_INREG: {
987  if (TypeIdx != 0)
988  return UnableToLegalize;
989 
990  if (!MI.getOperand(2).isImm())
991  return UnableToLegalize;
992  int64_t SizeInBits = MI.getOperand(2).getImm();
993 
994  // So long as the new type has more bits than the bits we're extending we
995  // don't need to break it apart.
996  if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
997  Observer.changingInstr(MI);
998  // We don't lose any non-extension bits by truncating the src and
999  // sign-extending the dst.
1000  MachineOperand &MO1 = MI.getOperand(1);
1001  auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1.getReg());
1002  MO1.setReg(TruncMIB->getOperand(0).getReg());
1003 
1004  MachineOperand &MO2 = MI.getOperand(0);
1005  Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1007  MIRBuilder.buildInstr(TargetOpcode::G_SEXT, {MO2.getReg()}, {DstExt});
1008  MO2.setReg(DstExt);
1009  Observer.changedInstr(MI);
1010  return Legalized;
1011  }
1012 
1013  // Break it apart. Components below the extension point are unmodified. The
1014  // component containing the extension point becomes a narrower SEXT_INREG.
1015  // Components above it are ashr'd from the component containing the
1016  // extension point.
1017  if (SizeOp0 % NarrowSize != 0)
1018  return UnableToLegalize;
1019  int NumParts = SizeOp0 / NarrowSize;
1020 
1021  // List the registers where the destination will be scattered.
1022  SmallVector<Register, 2> DstRegs;
1023  // List the registers where the source will be split.
1024  SmallVector<Register, 2> SrcRegs;
1025 
1026  // Create all the temporary registers.
1027  for (int i = 0; i < NumParts; ++i) {
1028  Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1029 
1030  SrcRegs.push_back(SrcReg);
1031  }
1032 
1033  // Explode the big arguments into smaller chunks.
1034  MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1).getReg());
1035 
1036  Register AshrCstReg =
1037  MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1038  ->getOperand(0)
1039  .getReg();
1040  Register FullExtensionReg = 0;
1041  Register PartialExtensionReg = 0;
1042 
1043  // Do the operation on each small part.
1044  for (int i = 0; i < NumParts; ++i) {
1045  if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1046  DstRegs.push_back(SrcRegs[i]);
1047  else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1048  assert(PartialExtensionReg &&
1049  "Expected to visit partial extension before full");
1050  if (FullExtensionReg) {
1051  DstRegs.push_back(FullExtensionReg);
1052  continue;
1053  }
1054  DstRegs.push_back(MIRBuilder
1055  .buildInstr(TargetOpcode::G_ASHR, {NarrowTy},
1056  {PartialExtensionReg, AshrCstReg})
1057  ->getOperand(0)
1058  .getReg());
1059  FullExtensionReg = DstRegs.back();
1060  } else {
1061  DstRegs.push_back(
1062  MIRBuilder
1063  .buildInstr(
1064  TargetOpcode::G_SEXT_INREG, {NarrowTy},
1065  {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1066  ->getOperand(0)
1067  .getReg());
1068  PartialExtensionReg = DstRegs.back();
1069  }
1070  }
1071 
1072  // Gather the destination registers into the final destination.
1073  Register DstReg = MI.getOperand(0).getReg();
1074  MIRBuilder.buildMerge(DstReg, DstRegs);
1075  MI.eraseFromParent();
1076  return Legalized;
1077  }
1078  }
1079 }
1080 
1081 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
1082  unsigned OpIdx, unsigned ExtOpcode) {
1083  MachineOperand &MO = MI.getOperand(OpIdx);
1084  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
1085  MO.setReg(ExtB->getOperand(0).getReg());
1086 }
1087 
1088 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
1089  unsigned OpIdx) {
1090  MachineOperand &MO = MI.getOperand(OpIdx);
1091  auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
1092  {MO.getReg()});
1093  MO.setReg(ExtB->getOperand(0).getReg());
1094 }
1095 
1096 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
1097  unsigned OpIdx, unsigned TruncOpcode) {
1098  MachineOperand &MO = MI.getOperand(OpIdx);
1099  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1101  MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
1102  MO.setReg(DstExt);
1103 }
1104 
1105 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1106  unsigned OpIdx, unsigned ExtOpcode) {
1107  MachineOperand &MO = MI.getOperand(OpIdx);
1108  Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1110  MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
1111  MO.setReg(DstTrunc);
1112 }
1113 
1114 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
1115  unsigned OpIdx) {
1116  MachineOperand &MO = MI.getOperand(OpIdx);
1117  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1119  MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
1120  MO.setReg(DstExt);
1121 }
1122 
1123 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
1124  unsigned OpIdx) {
1125  MachineOperand &MO = MI.getOperand(OpIdx);
1126 
1127  LLT OldTy = MRI.getType(MO.getReg());
1128  unsigned OldElts = OldTy.getNumElements();
1129  unsigned NewElts = MoreTy.getNumElements();
1130 
1131  unsigned NumParts = NewElts / OldElts;
1132 
1133  // Use concat_vectors if the result is a multiple of the number of elements.
1134  if (NumParts * OldElts == NewElts) {
1136  Parts.push_back(MO.getReg());
1137 
1138  Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1139  for (unsigned I = 1; I != NumParts; ++I)
1140  Parts.push_back(ImpDef);
1141 
1142  auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1143  MO.setReg(Concat.getReg(0));
1144  return;
1145  }
1146 
1147  Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1148  Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1149  MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1150  MO.setReg(MoreReg);
1151 }
1152 
1154 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1155  LLT WideTy) {
1156  if (TypeIdx != 1)
1157  return UnableToLegalize;
1158 
1159  Register DstReg = MI.getOperand(0).getReg();
1160  LLT DstTy = MRI.getType(DstReg);
1161  if (DstTy.isVector())
1162  return UnableToLegalize;
1163 
1164  Register Src1 = MI.getOperand(1).getReg();
1165  LLT SrcTy = MRI.getType(Src1);
1166  const int DstSize = DstTy.getSizeInBits();
1167  const int SrcSize = SrcTy.getSizeInBits();
1168  const int WideSize = WideTy.getSizeInBits();
1169  const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1170 
1171  unsigned NumOps = MI.getNumOperands();
1172  unsigned NumSrc = MI.getNumOperands() - 1;
1173  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1174 
1175  if (WideSize >= DstSize) {
1176  // Directly pack the bits in the target type.
1177  Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1178 
1179  for (unsigned I = 2; I != NumOps; ++I) {
1180  const unsigned Offset = (I - 1) * PartSize;
1181 
1182  Register SrcReg = MI.getOperand(I).getReg();
1183  assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1184 
1185  auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1186 
1187  Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1188  MRI.createGenericVirtualRegister(WideTy);
1189 
1190  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1191  auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1192  MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1193  ResultReg = NextResult;
1194  }
1195 
1196  if (WideSize > DstSize)
1197  MIRBuilder.buildTrunc(DstReg, ResultReg);
1198  else if (DstTy.isPointer())
1199  MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1200 
1201  MI.eraseFromParent();
1202  return Legalized;
1203  }
1204 
1205  // Unmerge the original values to the GCD type, and recombine to the next
1206  // multiple greater than the original type.
1207  //
1208  // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1209  // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1210  // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1211  // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1212  // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1213  // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1214  // %12:_(s12) = G_MERGE_VALUES %10, %11
1215  //
1216  // Padding with undef if necessary:
1217  //
1218  // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1219  // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1220  // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1221  // %7:_(s2) = G_IMPLICIT_DEF
1222  // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1223  // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1224  // %10:_(s12) = G_MERGE_VALUES %8, %9
1225 
1226  const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1227  LLT GCDTy = LLT::scalar(GCD);
1228 
1230  SmallVector<Register, 8> NewMergeRegs;
1231  SmallVector<Register, 8> Unmerges;
1232  LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1233 
1234  // Decompose the original operands if they don't evenly divide.
1235  for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1236  Register SrcReg = MI.getOperand(I).getReg();
1237  if (GCD == SrcSize) {
1238  Unmerges.push_back(SrcReg);
1239  } else {
1240  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1241  for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1242  Unmerges.push_back(Unmerge.getReg(J));
1243  }
1244  }
1245 
1246  // Pad with undef to the next size that is a multiple of the requested size.
1247  if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1248  Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1249  for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1250  Unmerges.push_back(UndefReg);
1251  }
1252 
1253  const int PartsPerGCD = WideSize / GCD;
1254 
1255  // Build merges of each piece.
1256  ArrayRef<Register> Slicer(Unmerges);
1257  for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1258  auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1259  NewMergeRegs.push_back(Merge.getReg(0));
1260  }
1261 
1262  // A truncate may be necessary if the requested type doesn't evenly divide the
1263  // original result type.
1264  if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1265  MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1266  } else {
1267  auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1268  MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1269  }
1270 
1271  MI.eraseFromParent();
1272  return Legalized;
1273 }
1274 
1276 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1277  LLT WideTy) {
1278  if (TypeIdx != 0)
1279  return UnableToLegalize;
1280 
1281  unsigned NumDst = MI.getNumOperands() - 1;
1282  Register SrcReg = MI.getOperand(NumDst).getReg();
1283  LLT SrcTy = MRI.getType(SrcReg);
1284  if (!SrcTy.isScalar())
1285  return UnableToLegalize;
1286 
1287  Register Dst0Reg = MI.getOperand(0).getReg();
1288  LLT DstTy = MRI.getType(Dst0Reg);
1289  if (!DstTy.isScalar())
1290  return UnableToLegalize;
1291 
1292  unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
1293  LLT NewSrcTy = LLT::scalar(NewSrcSize);
1294  unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
1295 
1296  auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
1297 
1298  for (unsigned I = 1; I != NumDst; ++I) {
1299  auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
1300  auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
1301  WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
1302  }
1303 
1304  Observer.changingInstr(MI);
1305 
1306  MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
1307  for (unsigned I = 0; I != NumDst; ++I)
1308  widenScalarDst(MI, WideTy, I);
1309 
1310  Observer.changedInstr(MI);
1311 
1312  return Legalized;
1313 }
1314 
1316 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1317  LLT WideTy) {
1318  Register DstReg = MI.getOperand(0).getReg();
1319  Register SrcReg = MI.getOperand(1).getReg();
1320  LLT SrcTy = MRI.getType(SrcReg);
1321 
1322  LLT DstTy = MRI.getType(DstReg);
1323  unsigned Offset = MI.getOperand(2).getImm();
1324 
1325  if (TypeIdx == 0) {
1326  if (SrcTy.isVector() || DstTy.isVector())
1327  return UnableToLegalize;
1328 
1329  SrcOp Src(SrcReg);
1330  if (SrcTy.isPointer()) {
1331  // Extracts from pointers can be handled only if they are really just
1332  // simple integers.
1333  const DataLayout &DL = MIRBuilder.getDataLayout();
1335  return UnableToLegalize;
1336 
1337  LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1338  Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1339  SrcTy = SrcAsIntTy;
1340  }
1341 
1342  if (DstTy.isPointer())
1343  return UnableToLegalize;
1344 
1345  if (Offset == 0) {
1346  // Avoid a shift in the degenerate case.
1347  MIRBuilder.buildTrunc(DstReg,
1348  MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1349  MI.eraseFromParent();
1350  return Legalized;
1351  }
1352 
1353  // Do a shift in the source type.
1354  LLT ShiftTy = SrcTy;
1355  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1356  Src = MIRBuilder.buildAnyExt(WideTy, Src);
1357  ShiftTy = WideTy;
1358  } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
1359  return UnableToLegalize;
1360 
1361  auto LShr = MIRBuilder.buildLShr(
1362  ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1363  MIRBuilder.buildTrunc(DstReg, LShr);
1364  MI.eraseFromParent();
1365  return Legalized;
1366  }
1367 
1368  if (SrcTy.isScalar()) {
1369  Observer.changingInstr(MI);
1370  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1371  Observer.changedInstr(MI);
1372  return Legalized;
1373  }
1374 
1375  if (!SrcTy.isVector())
1376  return UnableToLegalize;
1377 
1378  if (DstTy != SrcTy.getElementType())
1379  return UnableToLegalize;
1380 
1381  if (Offset % SrcTy.getScalarSizeInBits() != 0)
1382  return UnableToLegalize;
1383 
1384  Observer.changingInstr(MI);
1385  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1386 
1387  MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1388  Offset);
1389  widenScalarDst(MI, WideTy.getScalarType(), 0);
1390  Observer.changedInstr(MI);
1391  return Legalized;
1392 }
1393 
1395 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1396  LLT WideTy) {
1397  if (TypeIdx != 0)
1398  return UnableToLegalize;
1399  Observer.changingInstr(MI);
1400  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1401  widenScalarDst(MI, WideTy);
1402  Observer.changedInstr(MI);
1403  return Legalized;
1404 }
1405 
1407 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1408  MIRBuilder.setInstr(MI);
1409 
1410  switch (MI.getOpcode()) {
1411  default:
1412  return UnableToLegalize;
1413  case TargetOpcode::G_EXTRACT:
1414  return widenScalarExtract(MI, TypeIdx, WideTy);
1415  case TargetOpcode::G_INSERT:
1416  return widenScalarInsert(MI, TypeIdx, WideTy);
1417  case TargetOpcode::G_MERGE_VALUES:
1418  return widenScalarMergeValues(MI, TypeIdx, WideTy);
1419  case TargetOpcode::G_UNMERGE_VALUES:
1420  return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1421  case TargetOpcode::G_UADDO:
1422  case TargetOpcode::G_USUBO: {
1423  if (TypeIdx == 1)
1424  return UnableToLegalize; // TODO
1425  auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1426  {MI.getOperand(2).getReg()});
1427  auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1428  {MI.getOperand(3).getReg()});
1429  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
1430  ? TargetOpcode::G_ADD
1431  : TargetOpcode::G_SUB;
1432  // Do the arithmetic in the larger type.
1433  auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
1434  LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1436  auto AndOp = MIRBuilder.buildInstr(
1437  TargetOpcode::G_AND, {WideTy},
1438  {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
1439  // There is no overflow if the AndOp is the same as NewOp.
1441  AndOp);
1442  // Now trunc the NewOp to the original result.
1443  MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
1444  MI.eraseFromParent();
1445  return Legalized;
1446  }
1447  case TargetOpcode::G_CTTZ:
1448  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1449  case TargetOpcode::G_CTLZ:
1450  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1451  case TargetOpcode::G_CTPOP: {
1452  if (TypeIdx == 0) {
1453  Observer.changingInstr(MI);
1454  widenScalarDst(MI, WideTy, 0);
1455  Observer.changedInstr(MI);
1456  return Legalized;
1457  }
1458 
1459  Register SrcReg = MI.getOperand(1).getReg();
1460 
1461  // First ZEXT the input.
1462  auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1463  LLT CurTy = MRI.getType(SrcReg);
1464  if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1465  // The count is the same in the larger type except if the original
1466  // value was zero. This can be handled by setting the bit just off
1467  // the top of the original type.
1468  auto TopBit =
1469  APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1470  MIBSrc = MIRBuilder.buildOr(
1471  WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1472  }
1473 
1474  // Perform the operation at the larger size.
1475  auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1476  // This is already the correct result for CTPOP and CTTZs
1477  if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1478  MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1479  // The correct result is NewOp - (Difference in widety and current ty).
1480  unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1481  MIBNewOp = MIRBuilder.buildInstr(
1482  TargetOpcode::G_SUB, {WideTy},
1483  {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
1484  }
1485 
1486  MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1487  MI.eraseFromParent();
1488  return Legalized;
1489  }
1490  case TargetOpcode::G_BSWAP: {
1491  Observer.changingInstr(MI);
1492  Register DstReg = MI.getOperand(0).getReg();
1493 
1494  Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1495  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1496  Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1497  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1498 
1499  MI.getOperand(0).setReg(DstExt);
1500 
1502 
1503  LLT Ty = MRI.getType(DstReg);
1504  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1505  MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1506  MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
1507  .addDef(ShrReg)
1508  .addUse(DstExt)
1509  .addUse(ShiftAmtReg);
1510 
1511  MIRBuilder.buildTrunc(DstReg, ShrReg);
1512  Observer.changedInstr(MI);
1513  return Legalized;
1514  }
1515  case TargetOpcode::G_BITREVERSE: {
1516  Observer.changingInstr(MI);
1517 
1518  Register DstReg = MI.getOperand(0).getReg();
1519  LLT Ty = MRI.getType(DstReg);
1520  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1521 
1522  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1523  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1524  MI.getOperand(0).setReg(DstExt);
1526 
1527  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
1528  auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
1529  MIRBuilder.buildTrunc(DstReg, Shift);
1530  Observer.changedInstr(MI);
1531  return Legalized;
1532  }
1533  case TargetOpcode::G_ADD:
1534  case TargetOpcode::G_AND:
1535  case TargetOpcode::G_MUL:
1536  case TargetOpcode::G_OR:
1537  case TargetOpcode::G_XOR:
1538  case TargetOpcode::G_SUB:
1539  // Perform operation at larger width (any extension is fines here, high bits
1540  // don't affect the result) and then truncate the result back to the
1541  // original type.
1542  Observer.changingInstr(MI);
1543  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1544  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1545  widenScalarDst(MI, WideTy);
1546  Observer.changedInstr(MI);
1547  return Legalized;
1548 
1549  case TargetOpcode::G_SHL:
1550  Observer.changingInstr(MI);
1551 
1552  if (TypeIdx == 0) {
1553  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1554  widenScalarDst(MI, WideTy);
1555  } else {
1556  assert(TypeIdx == 1);
1557  // The "number of bits to shift" operand must preserve its value as an
1558  // unsigned integer:
1559  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1560  }
1561 
1562  Observer.changedInstr(MI);
1563  return Legalized;
1564 
1565  case TargetOpcode::G_SDIV:
1566  case TargetOpcode::G_SREM:
1567  case TargetOpcode::G_SMIN:
1568  case TargetOpcode::G_SMAX:
1569  Observer.changingInstr(MI);
1570  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1571  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1572  widenScalarDst(MI, WideTy);
1573  Observer.changedInstr(MI);
1574  return Legalized;
1575 
1576  case TargetOpcode::G_ASHR:
1577  case TargetOpcode::G_LSHR:
1578  Observer.changingInstr(MI);
1579 
1580  if (TypeIdx == 0) {
1581  unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1582  TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1583 
1584  widenScalarSrc(MI, WideTy, 1, CvtOp);
1585  widenScalarDst(MI, WideTy);
1586  } else {
1587  assert(TypeIdx == 1);
1588  // The "number of bits to shift" operand must preserve its value as an
1589  // unsigned integer:
1590  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1591  }
1592 
1593  Observer.changedInstr(MI);
1594  return Legalized;
1595  case TargetOpcode::G_UDIV:
1596  case TargetOpcode::G_UREM:
1597  case TargetOpcode::G_UMIN:
1598  case TargetOpcode::G_UMAX:
1599  Observer.changingInstr(MI);
1600  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1601  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1602  widenScalarDst(MI, WideTy);
1603  Observer.changedInstr(MI);
1604  return Legalized;
1605 
1606  case TargetOpcode::G_SELECT:
1607  Observer.changingInstr(MI);
1608  if (TypeIdx == 0) {
1609  // Perform operation at larger width (any extension is fine here, high
1610  // bits don't affect the result) and then truncate the result back to the
1611  // original type.
1612  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1613  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1614  widenScalarDst(MI, WideTy);
1615  } else {
1616  bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1617  // Explicit extension is required here since high bits affect the result.
1618  widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1619  }
1620  Observer.changedInstr(MI);
1621  return Legalized;
1622 
1623  case TargetOpcode::G_FPTOSI:
1624  case TargetOpcode::G_FPTOUI:
1625  Observer.changingInstr(MI);
1626 
1627  if (TypeIdx == 0)
1628  widenScalarDst(MI, WideTy);
1629  else
1630  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
1631 
1632  Observer.changedInstr(MI);
1633  return Legalized;
1634  case TargetOpcode::G_SITOFP:
1635  if (TypeIdx != 1)
1636  return UnableToLegalize;
1637  Observer.changingInstr(MI);
1638  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1639  Observer.changedInstr(MI);
1640  return Legalized;
1641 
1642  case TargetOpcode::G_UITOFP:
1643  if (TypeIdx != 1)
1644  return UnableToLegalize;
1645  Observer.changingInstr(MI);
1646  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1647  Observer.changedInstr(MI);
1648  return Legalized;
1649 
1650  case TargetOpcode::G_LOAD:
1651  case TargetOpcode::G_SEXTLOAD:
1652  case TargetOpcode::G_ZEXTLOAD:
1653  Observer.changingInstr(MI);
1654  widenScalarDst(MI, WideTy);
1655  Observer.changedInstr(MI);
1656  return Legalized;
1657 
1658  case TargetOpcode::G_STORE: {
1659  if (TypeIdx != 0)
1660  return UnableToLegalize;
1661 
1662  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1663  if (!isPowerOf2_32(Ty.getSizeInBits()))
1664  return UnableToLegalize;
1665 
1666  Observer.changingInstr(MI);
1667 
1668  unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1669  TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
1670  widenScalarSrc(MI, WideTy, 0, ExtType);
1671 
1672  Observer.changedInstr(MI);
1673  return Legalized;
1674  }
1675  case TargetOpcode::G_CONSTANT: {
1676  MachineOperand &SrcMO = MI.getOperand(1);
1678  const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
1679  Observer.changingInstr(MI);
1680  SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1681 
1682  widenScalarDst(MI, WideTy);
1683  Observer.changedInstr(MI);
1684  return Legalized;
1685  }
1686  case TargetOpcode::G_FCONSTANT: {
1687  MachineOperand &SrcMO = MI.getOperand(1);
1689  APFloat Val = SrcMO.getFPImm()->getValueAPF();
1690  bool LosesInfo;
1691  switch (WideTy.getSizeInBits()) {
1692  case 32:
1694  &LosesInfo);
1695  break;
1696  case 64:
1698  &LosesInfo);
1699  break;
1700  default:
1701  return UnableToLegalize;
1702  }
1703 
1704  assert(!LosesInfo && "extend should always be lossless");
1705 
1706  Observer.changingInstr(MI);
1707  SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
1708 
1709  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1710  Observer.changedInstr(MI);
1711  return Legalized;
1712  }
1713  case TargetOpcode::G_IMPLICIT_DEF: {
1714  Observer.changingInstr(MI);
1715  widenScalarDst(MI, WideTy);
1716  Observer.changedInstr(MI);
1717  return Legalized;
1718  }
1719  case TargetOpcode::G_BRCOND:
1720  Observer.changingInstr(MI);
1721  widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
1722  Observer.changedInstr(MI);
1723  return Legalized;
1724 
1725  case TargetOpcode::G_FCMP:
1726  Observer.changingInstr(MI);
1727  if (TypeIdx == 0)
1728  widenScalarDst(MI, WideTy);
1729  else {
1730  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
1731  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
1732  }
1733  Observer.changedInstr(MI);
1734  return Legalized;
1735 
1736  case TargetOpcode::G_ICMP:
1737  Observer.changingInstr(MI);
1738  if (TypeIdx == 0)
1739  widenScalarDst(MI, WideTy);
1740  else {
1741  unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1742  MI.getOperand(1).getPredicate()))
1743  ? TargetOpcode::G_SEXT
1744  : TargetOpcode::G_ZEXT;
1745  widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1746  widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1747  }
1748  Observer.changedInstr(MI);
1749  return Legalized;
1750 
1751  case TargetOpcode::G_GEP:
1752  assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
1753  Observer.changingInstr(MI);
1754  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1755  Observer.changedInstr(MI);
1756  return Legalized;
1757 
1758  case TargetOpcode::G_PHI: {
1759  assert(TypeIdx == 0 && "Expecting only Idx 0");
1760 
1761  Observer.changingInstr(MI);
1762  for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
1763  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
1764  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1765  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
1766  }
1767 
1768  MachineBasicBlock &MBB = *MI.getParent();
1769  MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1770  widenScalarDst(MI, WideTy);
1771  Observer.changedInstr(MI);
1772  return Legalized;
1773  }
1774  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1775  if (TypeIdx == 0) {
1776  Register VecReg = MI.getOperand(1).getReg();
1777  LLT VecTy = MRI.getType(VecReg);
1778  Observer.changingInstr(MI);
1779 
1780  widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1781  WideTy.getSizeInBits()),
1782  1, TargetOpcode::G_SEXT);
1783 
1784  widenScalarDst(MI, WideTy, 0);
1785  Observer.changedInstr(MI);
1786  return Legalized;
1787  }
1788 
1789  if (TypeIdx != 2)
1790  return UnableToLegalize;
1791  Observer.changingInstr(MI);
1792  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1793  Observer.changedInstr(MI);
1794  return Legalized;
1795  }
1796  case TargetOpcode::G_FADD:
1797  case TargetOpcode::G_FMUL:
1798  case TargetOpcode::G_FSUB:
1799  case TargetOpcode::G_FMA:
1800  case TargetOpcode::G_FMAD:
1801  case TargetOpcode::G_FNEG:
1802  case TargetOpcode::G_FABS:
1803  case TargetOpcode::G_FCANONICALIZE:
1804  case TargetOpcode::G_FMINNUM:
1805  case TargetOpcode::G_FMAXNUM:
1806  case TargetOpcode::G_FMINNUM_IEEE:
1807  case TargetOpcode::G_FMAXNUM_IEEE:
1808  case TargetOpcode::G_FMINIMUM:
1809  case TargetOpcode::G_FMAXIMUM:
1810  case TargetOpcode::G_FDIV:
1811  case TargetOpcode::G_FREM:
1812  case TargetOpcode::G_FCEIL:
1813  case TargetOpcode::G_FFLOOR:
1814  case TargetOpcode::G_FCOS:
1815  case TargetOpcode::G_FSIN:
1816  case TargetOpcode::G_FLOG10:
1817  case TargetOpcode::G_FLOG:
1818  case TargetOpcode::G_FLOG2:
1819  case TargetOpcode::G_FRINT:
1820  case TargetOpcode::G_FNEARBYINT:
1821  case TargetOpcode::G_FSQRT:
1822  case TargetOpcode::G_FEXP:
1823  case TargetOpcode::G_FEXP2:
1824  case TargetOpcode::G_FPOW:
1825  case TargetOpcode::G_INTRINSIC_TRUNC:
1826  case TargetOpcode::G_INTRINSIC_ROUND:
1827  assert(TypeIdx == 0);
1828  Observer.changingInstr(MI);
1829 
1830  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
1831  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
1832 
1833  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1834  Observer.changedInstr(MI);
1835  return Legalized;
1836  case TargetOpcode::G_INTTOPTR:
1837  if (TypeIdx != 1)
1838  return UnableToLegalize;
1839 
1840  Observer.changingInstr(MI);
1841  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1842  Observer.changedInstr(MI);
1843  return Legalized;
1844  case TargetOpcode::G_PTRTOINT:
1845  if (TypeIdx != 0)
1846  return UnableToLegalize;
1847 
1848  Observer.changingInstr(MI);
1849  widenScalarDst(MI, WideTy, 0);
1850  Observer.changedInstr(MI);
1851  return Legalized;
1852  case TargetOpcode::G_BUILD_VECTOR: {
1853  Observer.changingInstr(MI);
1854 
1855  const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
1856  for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
1857  widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
1858 
1859  // Avoid changing the result vector type if the source element type was
1860  // requested.
1861  if (TypeIdx == 1) {
1862  auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
1863  MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
1864  } else {
1865  widenScalarDst(MI, WideTy, 0);
1866  }
1867 
1868  Observer.changedInstr(MI);
1869  return Legalized;
1870  }
1871  case TargetOpcode::G_SEXT_INREG:
1872  if (TypeIdx != 0)
1873  return UnableToLegalize;
1874 
1875  Observer.changingInstr(MI);
1876  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1877  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
1878  Observer.changedInstr(MI);
1879  return Legalized;
1880  }
1881 }
1882 
1884 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1885  using namespace TargetOpcode;
1886  MIRBuilder.setInstr(MI);
1887 
1888  switch(MI.getOpcode()) {
1889  default:
1890  return UnableToLegalize;
1891  case TargetOpcode::G_SREM:
1892  case TargetOpcode::G_UREM: {
1893  Register QuotReg = MRI.createGenericVirtualRegister(Ty);
1894  MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
1895  .addDef(QuotReg)
1896  .addUse(MI.getOperand(1).getReg())
1897  .addUse(MI.getOperand(2).getReg());
1898 
1899  Register ProdReg = MRI.createGenericVirtualRegister(Ty);
1900  MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
1902  ProdReg);
1903  MI.eraseFromParent();
1904  return Legalized;
1905  }
1906  case TargetOpcode::G_SADDO:
1907  case TargetOpcode::G_SSUBO:
1908  return lowerSADDO_SSUBO(MI);
1909  case TargetOpcode::G_SMULO:
1910  case TargetOpcode::G_UMULO: {
1911  // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1912  // result.
1913  Register Res = MI.getOperand(0).getReg();
1914  Register Overflow = MI.getOperand(1).getReg();
1915  Register LHS = MI.getOperand(2).getReg();
1916  Register RHS = MI.getOperand(3).getReg();
1917 
1918  MIRBuilder.buildMul(Res, LHS, RHS);
1919 
1920  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1921  ? TargetOpcode::G_SMULH
1922  : TargetOpcode::G_UMULH;
1923 
1924  Register HiPart = MRI.createGenericVirtualRegister(Ty);
1925  MIRBuilder.buildInstr(Opcode)
1926  .addDef(HiPart)
1927  .addUse(LHS)
1928  .addUse(RHS);
1929 
1930  Register Zero = MRI.createGenericVirtualRegister(Ty);
1931  MIRBuilder.buildConstant(Zero, 0);
1932 
1933  // For *signed* multiply, overflow is detected by checking:
1934  // (hi != (lo >> bitwidth-1))
1935  if (Opcode == TargetOpcode::G_SMULH) {
1936  Register Shifted = MRI.createGenericVirtualRegister(Ty);
1937  Register ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1938  MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1939  MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1940  .addDef(Shifted)
1941  .addUse(Res)
1942  .addUse(ShiftAmt);
1943  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1944  } else {
1945  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
1946  }
1947  MI.eraseFromParent();
1948  return Legalized;
1949  }
1950  case TargetOpcode::G_FNEG: {
1951  // TODO: Handle vector types once we are able to
1952  // represent them.
1953  if (Ty.isVector())
1954  return UnableToLegalize;
1955  Register Res = MI.getOperand(0).getReg();
1956  Type *ZeroTy;
1958  switch (Ty.getSizeInBits()) {
1959  case 16:
1960  ZeroTy = Type::getHalfTy(Ctx);
1961  break;
1962  case 32:
1963  ZeroTy = Type::getFloatTy(Ctx);
1964  break;
1965  case 64:
1966  ZeroTy = Type::getDoubleTy(Ctx);
1967  break;
1968  case 128:
1969  ZeroTy = Type::getFP128Ty(Ctx);
1970  break;
1971  default:
1972  llvm_unreachable("unexpected floating-point type");
1973  }
1974  ConstantFP &ZeroForNegation =
1975  *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
1976  auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
1977  Register SubByReg = MI.getOperand(1).getReg();
1978  Register ZeroReg = Zero->getOperand(0).getReg();
1979  MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
1980  MI.getFlags());
1981  MI.eraseFromParent();
1982  return Legalized;
1983  }
1984  case TargetOpcode::G_FSUB: {
1985  // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1986  // First, check if G_FNEG is marked as Lower. If so, we may
1987  // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1988  if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
1989  return UnableToLegalize;
1990  Register Res = MI.getOperand(0).getReg();
1991  Register LHS = MI.getOperand(1).getReg();
1992  Register RHS = MI.getOperand(2).getReg();
1993  Register Neg = MRI.createGenericVirtualRegister(Ty);
1994  MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
1995  MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags());
1996  MI.eraseFromParent();
1997  return Legalized;
1998  }
1999  case TargetOpcode::G_FMAD:
2000  return lowerFMad(MI);
2001  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
2002  Register OldValRes = MI.getOperand(0).getReg();
2003  Register SuccessRes = MI.getOperand(1).getReg();
2004  Register Addr = MI.getOperand(2).getReg();
2005  Register CmpVal = MI.getOperand(3).getReg();
2006  Register NewVal = MI.getOperand(4).getReg();
2007  MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
2008  **MI.memoperands_begin());
2009  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
2010  MI.eraseFromParent();
2011  return Legalized;
2012  }
2013  case TargetOpcode::G_LOAD:
2014  case TargetOpcode::G_SEXTLOAD:
2015  case TargetOpcode::G_ZEXTLOAD: {
2016  // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2017  Register DstReg = MI.getOperand(0).getReg();
2018  Register PtrReg = MI.getOperand(1).getReg();
2019  LLT DstTy = MRI.getType(DstReg);
2020  auto &MMO = **MI.memoperands_begin();
2021 
2022  if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
2023  if (MI.getOpcode() == TargetOpcode::G_LOAD) {
2024  // This load needs splitting into power of 2 sized loads.
2025  if (DstTy.isVector())
2026  return UnableToLegalize;
2027  if (isPowerOf2_32(DstTy.getSizeInBits()))
2028  return UnableToLegalize; // Don't know what we're being asked to do.
2029 
2030  // Our strategy here is to generate anyextending loads for the smaller
2031  // types up to next power-2 result type, and then combine the two larger
2032  // result values together, before truncating back down to the non-pow-2
2033  // type.
2034  // E.g. v1 = i24 load =>
2035  // v2 = i32 load (2 byte)
2036  // v3 = i32 load (1 byte)
2037  // v4 = i32 shl v3, 16
2038  // v5 = i32 or v4, v2
2039  // v1 = i24 trunc v5
2040  // By doing this we generate the correct truncate which should get
2041  // combined away as an artifact with a matching extend.
2042  uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
2043  uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
2044 
2046  MachineMemOperand *LargeMMO =
2047  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2048  MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
2049  &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2050 
2051  LLT PtrTy = MRI.getType(PtrReg);
2052  unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
2053  LLT AnyExtTy = LLT::scalar(AnyExtSize);
2054  Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2055  Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2056  auto LargeLoad =
2057  MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO);
2058 
2059  auto OffsetCst =
2060  MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
2061  Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
2062  auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
2063  auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
2064  *SmallMMO);
2065 
2066  auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2067  auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2068  auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2069  MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
2070  MI.eraseFromParent();
2071  return Legalized;
2072  }
2073  MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
2074  MI.eraseFromParent();
2075  return Legalized;
2076  }
2077 
2078  if (DstTy.isScalar()) {
2079  Register TmpReg =
2080  MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
2081  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
2082  switch (MI.getOpcode()) {
2083  default:
2084  llvm_unreachable("Unexpected opcode");
2085  case TargetOpcode::G_LOAD:
2086  MIRBuilder.buildAnyExt(DstReg, TmpReg);
2087  break;
2088  case TargetOpcode::G_SEXTLOAD:
2089  MIRBuilder.buildSExt(DstReg, TmpReg);
2090  break;
2091  case TargetOpcode::G_ZEXTLOAD:
2092  MIRBuilder.buildZExt(DstReg, TmpReg);
2093  break;
2094  }
2095  MI.eraseFromParent();
2096  return Legalized;
2097  }
2098 
2099  return UnableToLegalize;
2100  }
2101  case TargetOpcode::G_STORE: {
2102  // Lower a non-power of 2 store into multiple pow-2 stores.
2103  // E.g. split an i24 store into an i16 store + i8 store.
2104  // We do this by first extending the stored value to the next largest power
2105  // of 2 type, and then using truncating stores to store the components.
2106  // By doing this, likewise with G_LOAD, generate an extend that can be
2107  // artifact-combined away instead of leaving behind extracts.
2108  Register SrcReg = MI.getOperand(0).getReg();
2109  Register PtrReg = MI.getOperand(1).getReg();
2110  LLT SrcTy = MRI.getType(SrcReg);
2111  MachineMemOperand &MMO = **MI.memoperands_begin();
2112  if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
2113  return UnableToLegalize;
2114  if (SrcTy.isVector())
2115  return UnableToLegalize;
2116  if (isPowerOf2_32(SrcTy.getSizeInBits()))
2117  return UnableToLegalize; // Don't know what we're being asked to do.
2118 
2119  // Extend to the next pow-2.
2120  const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
2121  auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
2122 
2123  // Obtain the smaller value by shifting away the larger value.
2124  uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
2125  uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
2126  auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
2127  auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2128 
2129  // Generate the GEP and truncating stores.
2130  LLT PtrTy = MRI.getType(PtrReg);
2131  auto OffsetCst =
2132  MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
2133  Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
2134  auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
2135 
2137  MachineMemOperand *LargeMMO =
2138  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2139  MachineMemOperand *SmallMMO =
2140  MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2141  MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
2142  MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
2143  MI.eraseFromParent();
2144  return Legalized;
2145  }
2146  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2147  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2148  case TargetOpcode::G_CTLZ:
2149  case TargetOpcode::G_CTTZ:
2150  case TargetOpcode::G_CTPOP:
2151  return lowerBitCount(MI, TypeIdx, Ty);
2152  case G_UADDO: {
2153  Register Res = MI.getOperand(0).getReg();
2154  Register CarryOut = MI.getOperand(1).getReg();
2155  Register LHS = MI.getOperand(2).getReg();
2156  Register RHS = MI.getOperand(3).getReg();
2157 
2158  MIRBuilder.buildAdd(Res, LHS, RHS);
2159  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
2160 
2161  MI.eraseFromParent();
2162  return Legalized;
2163  }
2164  case G_UADDE: {
2165  Register Res = MI.getOperand(0).getReg();
2166  Register CarryOut = MI.getOperand(1).getReg();
2167  Register LHS = MI.getOperand(2).getReg();
2168  Register RHS = MI.getOperand(3).getReg();
2169  Register CarryIn = MI.getOperand(4).getReg();
2170 
2171  Register TmpRes = MRI.createGenericVirtualRegister(Ty);
2172  Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
2173 
2174  MIRBuilder.buildAdd(TmpRes, LHS, RHS);
2175  MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
2176  MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
2177  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
2178 
2179  MI.eraseFromParent();
2180  return Legalized;
2181  }
2182  case G_USUBO: {
2183  Register Res = MI.getOperand(0).getReg();
2184  Register BorrowOut = MI.getOperand(1).getReg();
2185  Register LHS = MI.getOperand(2).getReg();
2186  Register RHS = MI.getOperand(3).getReg();
2187 
2188  MIRBuilder.buildSub(Res, LHS, RHS);
2189  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
2190 
2191  MI.eraseFromParent();
2192  return Legalized;
2193  }
2194  case G_USUBE: {
2195  Register Res = MI.getOperand(0).getReg();
2196  Register BorrowOut = MI.getOperand(1).getReg();
2197  Register LHS = MI.getOperand(2).getReg();
2198  Register RHS = MI.getOperand(3).getReg();
2199  Register BorrowIn = MI.getOperand(4).getReg();
2200 
2201  Register TmpRes = MRI.createGenericVirtualRegister(Ty);
2202  Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
2203  Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
2204  Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
2205 
2206  MIRBuilder.buildSub(TmpRes, LHS, RHS);
2207  MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
2208  MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
2209  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
2210  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
2211  MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
2212 
2213  MI.eraseFromParent();
2214  return Legalized;
2215  }
2216  case G_UITOFP:
2217  return lowerUITOFP(MI, TypeIdx, Ty);
2218  case G_SITOFP:
2219  return lowerSITOFP(MI, TypeIdx, Ty);
2220  case G_FPTOUI:
2221  return lowerFPTOUI(MI, TypeIdx, Ty);
2222  case G_SMIN:
2223  case G_SMAX:
2224  case G_UMIN:
2225  case G_UMAX:
2226  return lowerMinMax(MI, TypeIdx, Ty);
2227  case G_FCOPYSIGN:
2228  return lowerFCopySign(MI, TypeIdx, Ty);
2229  case G_FMINNUM:
2230  case G_FMAXNUM:
2231  return lowerFMinNumMaxNum(MI);
2232  case G_UNMERGE_VALUES:
2233  return lowerUnmergeValues(MI);
2234  case TargetOpcode::G_SEXT_INREG: {
2235  assert(MI.getOperand(2).isImm() && "Expected immediate");
2236  int64_t SizeInBits = MI.getOperand(2).getImm();
2237 
2238  Register DstReg = MI.getOperand(0).getReg();
2239  Register SrcReg = MI.getOperand(1).getReg();
2240  LLT DstTy = MRI.getType(DstReg);
2241  Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
2242 
2243  auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
2244  MIRBuilder.buildInstr(TargetOpcode::G_SHL, {TmpRes}, {SrcReg, MIBSz->getOperand(0).getReg()});
2245  MIRBuilder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {TmpRes, MIBSz->getOperand(0).getReg()});
2246  MI.eraseFromParent();
2247  return Legalized;
2248  }
2249  case G_SHUFFLE_VECTOR:
2250  return lowerShuffleVector(MI);
2251  case G_DYN_STACKALLOC:
2252  return lowerDynStackAlloc(MI);
2253  case G_EXTRACT:
2254  return lowerExtract(MI);
2255  case G_INSERT:
2256  return lowerInsert(MI);
2257  }
2258 }
2259 
2261  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
2262  SmallVector<Register, 2> DstRegs;
2263 
2264  unsigned NarrowSize = NarrowTy.getSizeInBits();
2265  Register DstReg = MI.getOperand(0).getReg();
2266  unsigned Size = MRI.getType(DstReg).getSizeInBits();
2267  int NumParts = Size / NarrowSize;
2268  // FIXME: Don't know how to handle the situation where the small vectors
2269  // aren't all the same size yet.
2270  if (Size % NarrowSize != 0)
2271  return UnableToLegalize;
2272 
2273  for (int i = 0; i < NumParts; ++i) {
2274  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
2275  MIRBuilder.buildUndef(TmpReg);
2276  DstRegs.push_back(TmpReg);
2277  }
2278 
2279  if (NarrowTy.isVector())
2280  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2281  else
2282  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2283 
2284  MI.eraseFromParent();
2285  return Legalized;
2286 }
2287 
2290  LLT NarrowTy) {
2291  const unsigned Opc = MI.getOpcode();
2292  const unsigned NumOps = MI.getNumOperands() - 1;
2293  const unsigned NarrowSize = NarrowTy.getSizeInBits();
2294  const Register DstReg = MI.getOperand(0).getReg();
2295  const unsigned Flags = MI.getFlags();
2296  const LLT DstTy = MRI.getType(DstReg);
2297  const unsigned Size = DstTy.getSizeInBits();
2298  const int NumParts = Size / NarrowSize;
2299  const LLT EltTy = DstTy.getElementType();
2300  const unsigned EltSize = EltTy.getSizeInBits();
2301  const unsigned BitsForNumParts = NarrowSize * NumParts;
2302 
2303  // Check if we have any leftovers. If we do, then only handle the case where
2304  // the leftover is one element.
2305  if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
2306  return UnableToLegalize;
2307 
2308  if (BitsForNumParts != Size) {
2309  Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
2310  MIRBuilder.buildUndef(AccumDstReg);
2311 
2312  // Handle the pieces which evenly divide into the requested type with
2313  // extract/op/insert sequence.
2314  for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
2315  SmallVector<SrcOp, 4> SrcOps;
2316  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2317  Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
2318  MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
2319  SrcOps.push_back(PartOpReg);
2320  }
2321 
2322  Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
2323  MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
2324 
2325  Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
2326  MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
2327  AccumDstReg = PartInsertReg;
2328  }
2329 
2330  // Handle the remaining element sized leftover piece.
2331  SmallVector<SrcOp, 4> SrcOps;
2332  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2333  Register PartOpReg = MRI.createGenericVirtualRegister(EltTy);
2334  MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
2335  BitsForNumParts);
2336  SrcOps.push_back(PartOpReg);
2337  }
2338 
2339  Register PartDstReg = MRI.createGenericVirtualRegister(EltTy);
2340  MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
2341  MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
2342  MI.eraseFromParent();
2343 
2344  return Legalized;
2345  }
2346 
2347  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2348 
2349  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
2350 
2351  if (NumOps >= 2)
2352  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
2353 
2354  if (NumOps >= 3)
2355  extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
2356 
2357  for (int i = 0; i < NumParts; ++i) {
2358  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
2359 
2360  if (NumOps == 1)
2361  MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
2362  else if (NumOps == 2) {
2363  MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
2364  } else if (NumOps == 3) {
2365  MIRBuilder.buildInstr(Opc, {DstReg},
2366  {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
2367  }
2368 
2369  DstRegs.push_back(DstReg);
2370  }
2371 
2372  if (NarrowTy.isVector())
2373  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2374  else
2375  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2376 
2377  MI.eraseFromParent();
2378  return Legalized;
2379 }
2380 
2381 // Handle splitting vector operations which need to have the same number of
2382 // elements in each type index, but each type index may have a different element
2383 // type.
2384 //
2385 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
2386 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2387 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2388 //
2389 // Also handles some irregular breakdown cases, e.g.
2390 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
2391 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2392 // s64 = G_SHL s64, s32
2395  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
2396  if (TypeIdx != 0)
2397  return UnableToLegalize;
2398 
2399  const LLT NarrowTy0 = NarrowTyArg;
2400  const unsigned NewNumElts =
2401  NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
2402 
2403  const Register DstReg = MI.getOperand(0).getReg();
2404  LLT DstTy = MRI.getType(DstReg);
2405  LLT LeftoverTy0;
2406 
2407  // All of the operands need to have the same number of elements, so if we can
2408  // determine a type breakdown for the result type, we can for all of the
2409  // source types.
2410  int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
2411  if (NumParts < 0)
2412  return UnableToLegalize;
2413 
2415 
2416  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2417  SmallVector<Register, 4> PartRegs, LeftoverRegs;
2418 
2419  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2420  LLT LeftoverTy;
2421  Register SrcReg = MI.getOperand(I).getReg();
2422  LLT SrcTyI = MRI.getType(SrcReg);
2423  LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
2424  LLT LeftoverTyI;
2425 
2426  // Split this operand into the requested typed registers, and any leftover
2427  // required to reproduce the original type.
2428  if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
2429  LeftoverRegs))
2430  return UnableToLegalize;
2431 
2432  if (I == 1) {
2433  // For the first operand, create an instruction for each part and setup
2434  // the result.
2435  for (Register PartReg : PartRegs) {
2436  Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2438  .addDef(PartDstReg)
2439  .addUse(PartReg));
2440  DstRegs.push_back(PartDstReg);
2441  }
2442 
2443  for (Register LeftoverReg : LeftoverRegs) {
2444  Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
2446  .addDef(PartDstReg)
2447  .addUse(LeftoverReg));
2448  LeftoverDstRegs.push_back(PartDstReg);
2449  }
2450  } else {
2451  assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
2452 
2453  // Add the newly created operand splits to the existing instructions. The
2454  // odd-sized pieces are ordered after the requested NarrowTyArg sized
2455  // pieces.
2456  unsigned InstCount = 0;
2457  for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
2458  NewInsts[InstCount++].addUse(PartRegs[J]);
2459  for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
2460  NewInsts[InstCount++].addUse(LeftoverRegs[J]);
2461  }
2462 
2463  PartRegs.clear();
2464  LeftoverRegs.clear();
2465  }
2466 
2467  // Insert the newly built operations and rebuild the result register.
2468  for (auto &MIB : NewInsts)
2469  MIRBuilder.insertInstr(MIB);
2470 
2471  insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
2472 
2473  MI.eraseFromParent();
2474  return Legalized;
2475 }
2476 
2479  LLT NarrowTy) {
2480  if (TypeIdx != 0)
2481  return UnableToLegalize;
2482 
2483  Register DstReg = MI.getOperand(0).getReg();
2484  Register SrcReg = MI.getOperand(1).getReg();
2485  LLT DstTy = MRI.getType(DstReg);
2486  LLT SrcTy = MRI.getType(SrcReg);
2487 
2488  LLT NarrowTy0 = NarrowTy;
2489  LLT NarrowTy1;
2490  unsigned NumParts;
2491 
2492  if (NarrowTy.isVector()) {
2493  // Uneven breakdown not handled.
2494  NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
2495  if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
2496  return UnableToLegalize;
2497 
2498  NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
2499  } else {
2500  NumParts = DstTy.getNumElements();
2501  NarrowTy1 = SrcTy.getElementType();
2502  }
2503 
2504  SmallVector<Register, 4> SrcRegs, DstRegs;
2505  extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
2506 
2507  for (unsigned I = 0; I < NumParts; ++I) {
2508  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2509  MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
2510  .addDef(DstReg)
2511  .addUse(SrcRegs[I]);
2512 
2513  NewInst->setFlags(MI.getFlags());
2514  DstRegs.push_back(DstReg);
2515  }
2516 
2517  if (NarrowTy.isVector())
2518  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2519  else
2520  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2521 
2522  MI.eraseFromParent();
2523  return Legalized;
2524 }
2525 
2528  LLT NarrowTy) {
2529  Register DstReg = MI.getOperand(0).getReg();
2530  Register Src0Reg = MI.getOperand(2).getReg();
2531  LLT DstTy = MRI.getType(DstReg);
2532  LLT SrcTy = MRI.getType(Src0Reg);
2533 
2534  unsigned NumParts;
2535  LLT NarrowTy0, NarrowTy1;
2536 
2537  if (TypeIdx == 0) {
2538  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2539  unsigned OldElts = DstTy.getNumElements();
2540 
2541  NarrowTy0 = NarrowTy;
2542  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
2543  NarrowTy1 = NarrowTy.isVector() ?
2544  LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
2545  SrcTy.getElementType();
2546 
2547  } else {
2548  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2549  unsigned OldElts = SrcTy.getNumElements();
2550 
2551  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
2552  NarrowTy.getNumElements();
2553  NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
2554  DstTy.getScalarSizeInBits());
2555  NarrowTy1 = NarrowTy;
2556  }
2557 
2558  // FIXME: Don't know how to handle the situation where the small vectors
2559  // aren't all the same size yet.
2560  if (NarrowTy1.isVector() &&
2561  NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
2562  return UnableToLegalize;
2563 
2564  CmpInst::Predicate Pred
2565  = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
2566 
2567  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
2568  extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
2569  extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
2570 
2571  for (unsigned I = 0; I < NumParts; ++I) {
2572  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2573  DstRegs.push_back(DstReg);
2574 
2575  if (MI.getOpcode() == TargetOpcode::G_ICMP)
2576  MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2577  else {
2578  MachineInstr *NewCmp
2579  = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2580  NewCmp->setFlags(MI.getFlags());
2581  }
2582  }
2583 
2584  if (NarrowTy1.isVector())
2585  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2586  else
2587  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2588 
2589  MI.eraseFromParent();
2590  return Legalized;
2591 }
2592 
2595  LLT NarrowTy) {
2596  Register DstReg = MI.getOperand(0).getReg();
2597  Register CondReg = MI.getOperand(1).getReg();
2598 
2599  unsigned NumParts = 0;
2600  LLT NarrowTy0, NarrowTy1;
2601 
2602  LLT DstTy = MRI.getType(DstReg);
2603  LLT CondTy = MRI.getType(CondReg);
2604  unsigned Size = DstTy.getSizeInBits();
2605 
2606  assert(TypeIdx == 0 || CondTy.isVector());
2607 
2608  if (TypeIdx == 0) {
2609  NarrowTy0 = NarrowTy;
2610  NarrowTy1 = CondTy;
2611 
2612  unsigned NarrowSize = NarrowTy0.getSizeInBits();
2613  // FIXME: Don't know how to handle the situation where the small vectors
2614  // aren't all the same size yet.
2615  if (Size % NarrowSize != 0)
2616  return UnableToLegalize;
2617 
2618  NumParts = Size / NarrowSize;
2619 
2620  // Need to break down the condition type
2621  if (CondTy.isVector()) {
2622  if (CondTy.getNumElements() == NumParts)
2623  NarrowTy1 = CondTy.getElementType();
2624  else
2625  NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
2626  CondTy.getScalarSizeInBits());
2627  }
2628  } else {
2629  NumParts = CondTy.getNumElements();
2630  if (NarrowTy.isVector()) {
2631  // TODO: Handle uneven breakdown.
2632  if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
2633  return UnableToLegalize;
2634 
2635  return UnableToLegalize;
2636  } else {
2637  NarrowTy0 = DstTy.getElementType();
2638  NarrowTy1 = NarrowTy;
2639  }
2640  }
2641 
2642  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2643  if (CondTy.isVector())
2644  extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
2645 
2646  extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
2647  extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
2648 
2649  for (unsigned i = 0; i < NumParts; ++i) {
2650  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2651  MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
2652  Src1Regs[i], Src2Regs[i]);
2653  DstRegs.push_back(DstReg);
2654  }
2655 
2656  if (NarrowTy0.isVector())
2657  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2658  else
2659  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2660 
2661  MI.eraseFromParent();
2662  return Legalized;
2663 }
2664 
2667  LLT NarrowTy) {
2668  const Register DstReg = MI.getOperand(0).getReg();
2669  LLT PhiTy = MRI.getType(DstReg);
2670  LLT LeftoverTy;
2671 
2672  // All of the operands need to have the same number of elements, so if we can
2673  // determine a type breakdown for the result type, we can for all of the
2674  // source types.
2675  int NumParts, NumLeftover;
2676  std::tie(NumParts, NumLeftover)
2677  = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
2678  if (NumParts < 0)
2679  return UnableToLegalize;
2680 
2681  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2683 
2684  const int TotalNumParts = NumParts + NumLeftover;
2685 
2686  // Insert the new phis in the result block first.
2687  for (int I = 0; I != TotalNumParts; ++I) {
2688  LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
2689  Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
2690  NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
2691  .addDef(PartDstReg));
2692  if (I < NumParts)
2693  DstRegs.push_back(PartDstReg);
2694  else
2695  LeftoverDstRegs.push_back(PartDstReg);
2696  }
2697 
2698  MachineBasicBlock *MBB = MI.getParent();
2699  MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
2700  insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
2701 
2702  SmallVector<Register, 4> PartRegs, LeftoverRegs;
2703 
2704  // Insert code to extract the incoming values in each predecessor block.
2705  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2706  PartRegs.clear();
2707  LeftoverRegs.clear();
2708 
2709  Register SrcReg = MI.getOperand(I).getReg();
2710  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2711  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2712 
2713  LLT Unused;
2714  if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
2715  LeftoverRegs))
2716  return UnableToLegalize;
2717 
2718  // Add the newly created operand splits to the existing instructions. The
2719  // odd-sized pieces are ordered after the requested NarrowTyArg sized
2720  // pieces.
2721  for (int J = 0; J != TotalNumParts; ++J) {
2722  MachineInstrBuilder MIB = NewInsts[J];
2723  MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
2724  MIB.addMBB(&OpMBB);
2725  }
2726  }
2727 
2728  MI.eraseFromParent();
2729  return Legalized;
2730 }
2731 
2734  unsigned TypeIdx,
2735  LLT NarrowTy) {
2736  if (TypeIdx != 1)
2737  return UnableToLegalize;
2738 
2739  const int NumDst = MI.getNumOperands() - 1;
2740  const Register SrcReg = MI.getOperand(NumDst).getReg();
2741  LLT SrcTy = MRI.getType(SrcReg);
2742 
2743  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2744 
2745  // TODO: Create sequence of extracts.
2746  if (DstTy == NarrowTy)
2747  return UnableToLegalize;
2748 
2749  LLT GCDTy = getGCDType(SrcTy, NarrowTy);
2750  if (DstTy == GCDTy) {
2751  // This would just be a copy of the same unmerge.
2752  // TODO: Create extracts, pad with undef and create intermediate merges.
2753  return UnableToLegalize;
2754  }
2755 
2756  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2757  const int NumUnmerge = Unmerge->getNumOperands() - 1;
2758  const int PartsPerUnmerge = NumDst / NumUnmerge;
2759 
2760  for (int I = 0; I != NumUnmerge; ++I) {
2761  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2762 
2763  for (int J = 0; J != PartsPerUnmerge; ++J)
2764  MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
2765  MIB.addUse(Unmerge.getReg(I));
2766  }
2767 
2768  MI.eraseFromParent();
2769  return Legalized;
2770 }
2771 
2774  unsigned TypeIdx,
2775  LLT NarrowTy) {
2776  assert(TypeIdx == 0 && "not a vector type index");
2777  Register DstReg = MI.getOperand(0).getReg();
2778  LLT DstTy = MRI.getType(DstReg);
2779  LLT SrcTy = DstTy.getElementType();
2780 
2781  int DstNumElts = DstTy.getNumElements();
2782  int NarrowNumElts = NarrowTy.getNumElements();
2783  int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts;
2784  LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy);
2785 
2786  SmallVector<Register, 8> ConcatOps;
2787  SmallVector<Register, 8> SubBuildVector;
2788 
2789  Register UndefReg;
2790  if (WidenedDstTy != DstTy)
2791  UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0);
2792 
2793  // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as
2794  // necessary.
2795  //
2796  // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
2797  // -> <2 x s16>
2798  //
2799  // %4:_(s16) = G_IMPLICIT_DEF
2800  // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
2801  // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
2802  // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6
2803  // %3:_(<3 x s16>) = G_EXTRACT %7, 0
2804  for (int I = 0; I != NumConcat; ++I) {
2805  for (int J = 0; J != NarrowNumElts; ++J) {
2806  int SrcIdx = NarrowNumElts * I + J;
2807 
2808  if (SrcIdx < DstNumElts) {
2809  Register SrcReg = MI.getOperand(SrcIdx + 1).getReg();
2810  SubBuildVector.push_back(SrcReg);
2811  } else
2812  SubBuildVector.push_back(UndefReg);
2813  }
2814 
2815  auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector);
2816  ConcatOps.push_back(BuildVec.getReg(0));
2817  SubBuildVector.clear();
2818  }
2819 
2820  if (DstTy == WidenedDstTy)
2821  MIRBuilder.buildConcatVectors(DstReg, ConcatOps);
2822  else {
2823  auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps);
2824  MIRBuilder.buildExtract(DstReg, Concat, 0);
2825  }
2826 
2827  MI.eraseFromParent();
2828  return Legalized;
2829 }
2830 
2833  LLT NarrowTy) {
2834  // FIXME: Don't know how to handle secondary types yet.
2835  if (TypeIdx != 0)
2836  return UnableToLegalize;
2837 
2838  MachineMemOperand *MMO = *MI.memoperands_begin();
2839 
2840  // This implementation doesn't work for atomics. Give up instead of doing
2841  // something invalid.
2842  if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
2844  return UnableToLegalize;
2845 
2846  bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2847  Register ValReg = MI.getOperand(0).getReg();
2848  Register AddrReg = MI.getOperand(1).getReg();
2849  LLT ValTy = MRI.getType(ValReg);
2850 
2851  int NumParts = -1;
2852  int NumLeftover = -1;
2853  LLT LeftoverTy;
2854  SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
2855  if (IsLoad) {
2856  std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
2857  } else {
2858  if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
2859  NarrowLeftoverRegs)) {
2860  NumParts = NarrowRegs.size();
2861  NumLeftover = NarrowLeftoverRegs.size();
2862  }
2863  }
2864 
2865  if (NumParts == -1)
2866  return UnableToLegalize;
2867 
2868  const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
2869 
2870  unsigned TotalSize = ValTy.getSizeInBits();
2871 
2872  // Split the load/store into PartTy sized pieces starting at Offset. If this
2873  // is a load, return the new registers in ValRegs. For a store, each elements
2874  // of ValRegs should be PartTy. Returns the next offset that needs to be
2875  // handled.
2876  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
2877  unsigned Offset) -> unsigned {
2879  unsigned PartSize = PartTy.getSizeInBits();
2880  for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
2881  Offset += PartSize, ++Idx) {
2882  unsigned ByteSize = PartSize / 8;
2883  unsigned ByteOffset = Offset / 8;
2884  Register NewAddrReg;
2885 
2886  MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
2887 
2888  MachineMemOperand *NewMMO =
2889  MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
2890 
2891  if (IsLoad) {
2892  Register Dst = MRI.createGenericVirtualRegister(PartTy);
2893  ValRegs.push_back(Dst);
2894  MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
2895  } else {
2896  MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
2897  }
2898  }
2899 
2900  return Offset;
2901  };
2902 
2903  unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
2904 
2905  // Handle the rest of the register if this isn't an even type breakdown.
2906  if (LeftoverTy.isValid())
2907  splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
2908 
2909  if (IsLoad) {
2910  insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
2911  LeftoverTy, NarrowLeftoverRegs);
2912  }
2913 
2914  MI.eraseFromParent();
2915  return Legalized;
2916 }
2917 
2920  LLT NarrowTy) {
2921  using namespace TargetOpcode;
2922 
2923  MIRBuilder.setInstr(MI);
2924  switch (MI.getOpcode()) {
2925  case G_IMPLICIT_DEF:
2926  return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
2927  case G_AND:
2928  case G_OR:
2929  case G_XOR:
2930  case G_ADD:
2931  case G_SUB:
2932  case G_MUL:
2933  case G_SMULH:
2934  case G_UMULH:
2935  case G_FADD:
2936  case G_FMUL:
2937  case G_FSUB:
2938  case G_FNEG:
2939  case G_FABS:
2940  case G_FCANONICALIZE:
2941  case G_FDIV:
2942  case G_FREM:
2943  case G_FMA:
2944  case G_FMAD:
2945  case G_FPOW:
2946  case G_FEXP:
2947  case G_FEXP2:
2948  case G_FLOG:
2949  case G_FLOG2:
2950  case G_FLOG10:
2951  case G_FNEARBYINT:
2952  case G_FCEIL:
2953  case G_FFLOOR:
2954  case G_FRINT:
2955  case G_INTRINSIC_ROUND:
2956  case G_INTRINSIC_TRUNC:
2957  case G_FCOS:
2958  case G_FSIN:
2959  case G_FSQRT:
2960  case G_BSWAP:
2961  case G_BITREVERSE:
2962  case G_SDIV:
2963  case G_SMIN:
2964  case G_SMAX:
2965  case G_UMIN:
2966  case G_UMAX:
2967  case G_FMINNUM:
2968  case G_FMAXNUM:
2969  case G_FMINNUM_IEEE:
2970  case G_FMAXNUM_IEEE:
2971  case G_FMINIMUM:
2972  case G_FMAXIMUM:
2973  return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
2974  case G_SHL:
2975  case G_LSHR:
2976  case G_ASHR:
2977  case G_CTLZ:
2978  case G_CTLZ_ZERO_UNDEF:
2979  case G_CTTZ:
2980  case G_CTTZ_ZERO_UNDEF:
2981  case G_CTPOP:
2982  case G_FCOPYSIGN:
2983  return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
2984  case G_ZEXT:
2985  case G_SEXT:
2986  case G_ANYEXT:
2987  case G_FPEXT:
2988  case G_FPTRUNC:
2989  case G_SITOFP:
2990  case G_UITOFP:
2991  case G_FPTOSI:
2992  case G_FPTOUI:
2993  case G_INTTOPTR:
2994  case G_PTRTOINT:
2995  case G_ADDRSPACE_CAST:
2996  return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
2997  case G_ICMP:
2998  case G_FCMP:
2999  return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
3000  case G_SELECT:
3001  return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
3002  case G_PHI:
3003  return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
3004  case G_UNMERGE_VALUES:
3005  return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
3006  case G_BUILD_VECTOR:
3007  return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy);
3008  case G_LOAD:
3009  case G_STORE:
3010  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
3011  default:
3012  return UnableToLegalize;
3013  }
3014 }
3015 
3018  const LLT HalfTy, const LLT AmtTy) {
3019 
3020  Register InL = MRI.createGenericVirtualRegister(HalfTy);
3021  Register InH = MRI.createGenericVirtualRegister(HalfTy);
3022  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
3023 
3024  if (Amt.isNullValue()) {
3025  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
3026  MI.eraseFromParent();
3027  return Legalized;
3028  }
3029 
3030  LLT NVT = HalfTy;
3031  unsigned NVTBits = HalfTy.getSizeInBits();
3032  unsigned VTBits = 2 * NVTBits;
3033 
3034  SrcOp Lo(Register(0)), Hi(Register(0));
3035  if (MI.getOpcode() == TargetOpcode::G_SHL) {
3036  if (Amt.ugt(VTBits)) {
3037  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
3038  } else if (Amt.ugt(NVTBits)) {
3039  Lo = MIRBuilder.buildConstant(NVT, 0);
3040  Hi = MIRBuilder.buildShl(NVT, InL,
3041  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3042  } else if (Amt == NVTBits) {
3043  Lo = MIRBuilder.buildConstant(NVT, 0);
3044  Hi = InL;
3045  } else {
3046  Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
3047  auto OrLHS =
3048  MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
3049  auto OrRHS = MIRBuilder.buildLShr(
3050  NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3051  Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3052  }
3053  } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
3054  if (Amt.ugt(VTBits)) {
3055  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
3056  } else if (Amt.ugt(NVTBits)) {
3057  Lo = MIRBuilder.buildLShr(NVT, InH,
3058  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3059  Hi = MIRBuilder.buildConstant(NVT, 0);
3060  } else if (Amt == NVTBits) {
3061  Lo = InH;
3062  Hi = MIRBuilder.buildConstant(NVT, 0);
3063  } else {
3064  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
3065 
3066  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
3067  auto OrRHS = MIRBuilder.buildShl(
3068  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3069 
3070  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3071  Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
3072  }
3073  } else {
3074  if (Amt.ugt(VTBits)) {
3075  Hi = Lo = MIRBuilder.buildAShr(
3076  NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3077  } else if (Amt.ugt(NVTBits)) {
3078  Lo = MIRBuilder.buildAShr(NVT, InH,
3079  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3080  Hi = MIRBuilder.buildAShr(NVT, InH,
3081  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3082  } else if (Amt == NVTBits) {
3083  Lo = InH;
3084  Hi = MIRBuilder.buildAShr(NVT, InH,
3085  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3086  } else {
3087  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
3088 
3089  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
3090  auto OrRHS = MIRBuilder.buildShl(
3091  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3092 
3093  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3094  Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
3095  }
3096  }
3097 
3098  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
3099  MI.eraseFromParent();
3100 
3101  return Legalized;
3102 }
3103 
3104 // TODO: Optimize if constant shift amount.
3107  LLT RequestedTy) {
3108  if (TypeIdx == 1) {
3109  Observer.changingInstr(MI);
3110  narrowScalarSrc(MI, RequestedTy, 2);
3111  Observer.changedInstr(MI);
3112  return Legalized;
3113  }
3114 
3115  Register DstReg = MI.getOperand(0).getReg();
3116  LLT DstTy = MRI.getType(DstReg);
3117  if (DstTy.isVector())
3118  return UnableToLegalize;
3119 
3120  Register Amt = MI.getOperand(2).getReg();
3121  LLT ShiftAmtTy = MRI.getType(Amt);
3122  const unsigned DstEltSize = DstTy.getScalarSizeInBits();
3123  if (DstEltSize % 2 != 0)
3124  return UnableToLegalize;
3125 
3126  // Ignore the input type. We can only go to exactly half the size of the
3127  // input. If that isn't small enough, the resulting pieces will be further
3128  // legalized.
3129  const unsigned NewBitSize = DstEltSize / 2;
3130  const LLT HalfTy = LLT::scalar(NewBitSize);
3131  const LLT CondTy = LLT::scalar(1);
3132 
3133  if (const MachineInstr *KShiftAmt =
3134  getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
3136  MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
3137  }
3138 
3139  // TODO: Expand with known bits.
3140 
3141  // Handle the fully general expansion by an unknown amount.
3142  auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
3143 
3144  Register InL = MRI.createGenericVirtualRegister(HalfTy);
3145  Register InH = MRI.createGenericVirtualRegister(HalfTy);
3146  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
3147 
3148  auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
3149  auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
3150 
3151  auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
3152  auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
3153  auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
3154 
3155  Register ResultRegs[2];
3156  switch (MI.getOpcode()) {
3157  case TargetOpcode::G_SHL: {
3158  // Short: ShAmt < NewBitSize
3159  auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
3160 
3161  auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
3162  auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
3163  auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3164 
3165  // Long: ShAmt >= NewBitSize
3166  auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
3167  auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
3168 
3169  auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
3170  auto Hi = MIRBuilder.buildSelect(
3171  HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
3172 
3173  ResultRegs[0] = Lo.getReg(0);
3174  ResultRegs[1] = Hi.getReg(0);
3175  break;
3176  }
3177  case TargetOpcode::G_LSHR:
3178  case TargetOpcode::G_ASHR: {
3179  // Short: ShAmt < NewBitSize
3180  auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
3181 
3182  auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
3183  auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
3184  auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3185 
3186  // Long: ShAmt >= NewBitSize
3187  MachineInstrBuilder HiL;
3188  if (MI.getOpcode() == TargetOpcode::G_LSHR) {
3189  HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
3190  } else {
3191  auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
3192  HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
3193  }
3194  auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
3195  {InH, AmtExcess}); // Lo from Hi part.
3196 
3197  auto Lo = MIRBuilder.buildSelect(
3198  HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
3199 
3200  auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
3201 
3202  ResultRegs[0] = Lo.getReg(0);
3203  ResultRegs[1] = Hi.getReg(0);
3204  break;
3205  }
3206  default:
3207  llvm_unreachable("not a shift");
3208  }
3209 
3210  MIRBuilder.buildMerge(DstReg, ResultRegs);
3211  MI.eraseFromParent();
3212  return Legalized;
3213 }
3214 
3217  LLT MoreTy) {
3218  assert(TypeIdx == 0 && "Expecting only Idx 0");
3219 
3220  Observer.changingInstr(MI);
3221  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3222  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3223  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3224  moreElementsVectorSrc(MI, MoreTy, I);
3225  }
3226 
3227  MachineBasicBlock &MBB = *MI.getParent();
3228  MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3229  moreElementsVectorDst(MI, MoreTy, 0);
3230  Observer.changedInstr(MI);
3231  return Legalized;
3232 }
3233 
3236  LLT MoreTy) {
3237  MIRBuilder.setInstr(MI);
3238  unsigned Opc = MI.getOpcode();
3239  switch (Opc) {
3240  case TargetOpcode::G_IMPLICIT_DEF:
3241  case TargetOpcode::G_LOAD: {
3242  if (TypeIdx != 0)
3243  return UnableToLegalize;
3244  Observer.changingInstr(MI);
3245  moreElementsVectorDst(MI, MoreTy, 0);
3246  Observer.changedInstr(MI);
3247  return Legalized;
3248  }
3249  case TargetOpcode::G_STORE:
3250  if (TypeIdx != 0)
3251  return UnableToLegalize;
3252  Observer.changingInstr(MI);
3253  moreElementsVectorSrc(MI, MoreTy, 0);
3254  Observer.changedInstr(MI);
3255  return Legalized;
3256  case TargetOpcode::G_AND:
3257  case TargetOpcode::G_OR:
3258  case TargetOpcode::G_XOR:
3259  case TargetOpcode::G_SMIN:
3260  case TargetOpcode::G_SMAX:
3261  case TargetOpcode::G_UMIN:
3262  case TargetOpcode::G_UMAX: {
3263  Observer.changingInstr(MI);
3264  moreElementsVectorSrc(MI, MoreTy, 1);
3265  moreElementsVectorSrc(MI, MoreTy, 2);
3266  moreElementsVectorDst(MI, MoreTy, 0);
3267  Observer.changedInstr(MI);
3268  return Legalized;
3269  }
3270  case TargetOpcode::G_EXTRACT:
3271  if (TypeIdx != 1)
3272  return UnableToLegalize;
3273  Observer.changingInstr(MI);
3274  moreElementsVectorSrc(MI, MoreTy, 1);
3275  Observer.changedInstr(MI);
3276  return Legalized;
3277  case TargetOpcode::G_INSERT:
3278  if (TypeIdx != 0)
3279  return UnableToLegalize;
3280  Observer.changingInstr(MI);
3281  moreElementsVectorSrc(MI, MoreTy, 1);
3282  moreElementsVectorDst(MI, MoreTy, 0);
3283  Observer.changedInstr(MI);
3284  return Legalized;
3285  case TargetOpcode::G_SELECT:
3286  if (TypeIdx != 0)
3287  return UnableToLegalize;
3288  if (MRI.getType(MI.getOperand(1).getReg()).isVector())
3289  return UnableToLegalize;
3290 
3291  Observer.changingInstr(MI);
3292  moreElementsVectorSrc(MI, MoreTy, 2);
3293  moreElementsVectorSrc(MI, MoreTy, 3);
3294  moreElementsVectorDst(MI, MoreTy, 0);
3295  Observer.changedInstr(MI);
3296  return Legalized;
3297  case TargetOpcode::G_UNMERGE_VALUES: {
3298  if (TypeIdx != 1)
3299  return UnableToLegalize;
3300 
3301  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3302  int NumDst = MI.getNumOperands() - 1;
3303  moreElementsVectorSrc(MI, MoreTy, NumDst);
3304 
3305  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3306  for (int I = 0; I != NumDst; ++I)
3307  MIB.addDef(MI.getOperand(I).getReg());
3308 
3309  int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
3310  for (int I = NumDst; I != NewNumDst; ++I)
3311  MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
3312 
3313  MIB.addUse(MI.getOperand(NumDst).getReg());
3314  MI.eraseFromParent();
3315  return Legalized;
3316  }
3317  case TargetOpcode::G_PHI:
3318  return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
3319  default:
3320  return UnableToLegalize;
3321  }
3322 }
3323 
3324 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
3325  ArrayRef<Register> Src1Regs,
3326  ArrayRef<Register> Src2Regs,
3327  LLT NarrowTy) {
3329  unsigned SrcParts = Src1Regs.size();
3330  unsigned DstParts = DstRegs.size();
3331 
3332  unsigned DstIdx = 0; // Low bits of the result.
3333  Register FactorSum =
3334  B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
3335  DstRegs[DstIdx] = FactorSum;
3336 
3337  unsigned CarrySumPrevDstIdx;
3338  SmallVector<Register, 4> Factors;
3339 
3340  for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
3341  // Collect low parts of muls for DstIdx.
3342  for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
3343  i <= std::min(DstIdx, SrcParts - 1); ++i) {
3344  MachineInstrBuilder Mul =
3345  B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
3346  Factors.push_back(Mul.getReg(0));
3347  }
3348  // Collect high parts of muls from previous DstIdx.
3349  for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
3350  i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
3351  MachineInstrBuilder Umulh =
3352  B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
3353  Factors.push_back(Umulh.getReg(0));
3354  }
3355  // Add CarrySum from additons calculated for previous DstIdx.
3356  if (DstIdx != 1) {
3357  Factors.push_back(CarrySumPrevDstIdx);
3358  }
3359 
3360  Register CarrySum;
3361  // Add all factors and accumulate all carries into CarrySum.
3362  if (DstIdx != DstParts - 1) {
3363  MachineInstrBuilder Uaddo =
3364  B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
3365  FactorSum = Uaddo.getReg(0);
3366  CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
3367  for (unsigned i = 2; i < Factors.size(); ++i) {
3368  MachineInstrBuilder Uaddo =
3369  B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
3370  FactorSum = Uaddo.getReg(0);
3371  MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
3372  CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
3373  }
3374  } else {
3375  // Since value for the next index is not calculated, neither is CarrySum.
3376  FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
3377  for (unsigned i = 2; i < Factors.size(); ++i)
3378  FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
3379  }
3380 
3381  CarrySumPrevDstIdx = CarrySum;
3382  DstRegs[DstIdx] = FactorSum;
3383  Factors.clear();
3384  }
3385 }
3386 
3389  Register DstReg = MI.getOperand(0).getReg();
3390  Register Src1 = MI.getOperand(1).getReg();
3391  Register Src2 = MI.getOperand(2).getReg();
3392 
3393  LLT Ty = MRI.getType(DstReg);
3394  if (Ty.isVector())
3395  return UnableToLegalize;
3396 
3397  unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
3398  unsigned DstSize = Ty.getSizeInBits();
3399  unsigned NarrowSize = NarrowTy.getSizeInBits();
3400  if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
3401  return UnableToLegalize;
3402 
3403  unsigned NumDstParts = DstSize / NarrowSize;
3404  unsigned NumSrcParts = SrcSize / NarrowSize;
3405  bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
3406  unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
3407 
3408  SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs;
3409  extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
3410  extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
3411  DstTmpRegs.resize(DstTmpParts);
3412  multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
3413 
3414  // Take only high half of registers if this is high mul.
3415  ArrayRef<Register> DstRegs(
3416  IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
3417  MIRBuilder.buildMerge(DstReg, DstRegs);
3418  MI.eraseFromParent();
3419  return Legalized;
3420 }
3421 
3424  LLT NarrowTy) {
3425  if (TypeIdx != 1)
3426  return UnableToLegalize;
3427 
3428  uint64_t NarrowSize = NarrowTy.getSizeInBits();
3429 
3430  int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
3431  // FIXME: add support for when SizeOp1 isn't an exact multiple of
3432  // NarrowSize.
3433  if (SizeOp1 % NarrowSize != 0)
3434  return UnableToLegalize;
3435  int NumParts = SizeOp1 / NarrowSize;
3436 
3437  SmallVector<Register, 2> SrcRegs, DstRegs;
3438  SmallVector<uint64_t, 2> Indexes;
3439  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3440 
3441  Register OpReg = MI.getOperand(0).getReg();
3442  uint64_t OpStart = MI.getOperand(2).getImm();
3443  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3444  for (int i = 0; i < NumParts; ++i) {
3445  unsigned SrcStart = i * NarrowSize;
3446 
3447  if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
3448  // No part of the extract uses this subregister, ignore it.
3449  continue;
3450  } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3451  // The entire subregister is extracted, forward the value.
3452  DstRegs.push_back(SrcRegs[i]);
3453  continue;
3454  }
3455 
3456  // OpSegStart is where this destination segment would start in OpReg if it
3457  // extended infinitely in both directions.
3458  int64_t ExtractOffset;
3459  uint64_t SegSize;
3460  if (OpStart < SrcStart) {
3461  ExtractOffset = 0;
3462  SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
3463  } else {
3464  ExtractOffset = OpStart - SrcStart;
3465  SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
3466  }
3467 
3468  Register SegReg = SrcRegs[i];
3469  if (ExtractOffset != 0 || SegSize != NarrowSize) {
3470  // A genuine extract is needed.
3471  SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3472  MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
3473  }
3474 
3475  DstRegs.push_back(SegReg);
3476  }
3477 
3478  Register DstReg = MI.getOperand(0).getReg();
3479  if(MRI.getType(DstReg).isVector())
3480  MIRBuilder.buildBuildVector(DstReg, DstRegs);
3481  else
3482  MIRBuilder.buildMerge(DstReg, DstRegs);
3483  MI.eraseFromParent();
3484  return Legalized;
3485 }
3486 
3489  LLT NarrowTy) {
3490  // FIXME: Don't know how to handle secondary types yet.
3491  if (TypeIdx != 0)
3492  return UnableToLegalize;
3493 
3494  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
3495  uint64_t NarrowSize = NarrowTy.getSizeInBits();
3496 
3497  // FIXME: add support for when SizeOp0 isn't an exact multiple of
3498  // NarrowSize.
3499  if (SizeOp0 % NarrowSize != 0)
3500  return UnableToLegalize;
3501 
3502  int NumParts = SizeOp0 / NarrowSize;
3503 
3504  SmallVector<Register, 2> SrcRegs, DstRegs;
3505  SmallVector<uint64_t, 2> Indexes;
3506  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3507 
3508  Register OpReg = MI.getOperand(2).getReg();
3509  uint64_t OpStart = MI.getOperand(3).getImm();
3510  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3511  for (int i = 0; i < NumParts; ++i) {
3512  unsigned DstStart = i * NarrowSize;
3513 
3514  if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
3515  // No part of the insert affects this subregister, forward the original.
3516  DstRegs.push_back(SrcRegs[i]);
3517  continue;
3518  } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3519  // The entire subregister is defined by this insert, forward the new
3520  // value.
3521  DstRegs.push_back(OpReg);
3522  continue;
3523  }
3524 
3525  // OpSegStart is where this destination segment would start in OpReg if it
3526  // extended infinitely in both directions.
3527  int64_t ExtractOffset, InsertOffset;
3528  uint64_t SegSize;
3529  if (OpStart < DstStart) {
3530  InsertOffset = 0;
3531  ExtractOffset = DstStart - OpStart;
3532  SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
3533  } else {
3534  InsertOffset = OpStart - DstStart;
3535  ExtractOffset = 0;
3536  SegSize =
3537  std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
3538  }
3539 
3540  Register SegReg = OpReg;
3541  if (ExtractOffset != 0 || SegSize != OpSize) {
3542  // A genuine extract is needed.
3543  SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3544  MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
3545  }
3546 
3547  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
3548  MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
3549  DstRegs.push_back(DstReg);
3550  }
3551 
3552  assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
3553  Register DstReg = MI.getOperand(0).getReg();
3554  if(MRI.getType(DstReg).isVector())
3555  MIRBuilder.buildBuildVector(DstReg, DstRegs);
3556  else
3557  MIRBuilder.buildMerge(DstReg, DstRegs);
3558  MI.eraseFromParent();
3559  return Legalized;
3560 }
3561 
3564  LLT NarrowTy) {
3565  Register DstReg = MI.getOperand(0).getReg();
3566  LLT DstTy = MRI.getType(DstReg);
3567 
3568  assert(MI.getNumOperands() == 3 && TypeIdx == 0);
3569 
3570  SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3571  SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
3572  SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3573  LLT LeftoverTy;
3574  if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
3575  Src0Regs, Src0LeftoverRegs))
3576  return UnableToLegalize;
3577 
3578  LLT Unused;
3579  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
3580  Src1Regs, Src1LeftoverRegs))
3581  llvm_unreachable("inconsistent extractParts result");
3582 
3583  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3584  auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
3585  {Src0Regs[I], Src1Regs[I]});
3586  DstRegs.push_back(Inst->getOperand(0).getReg());
3587  }
3588 
3589  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3590  auto Inst = MIRBuilder.buildInstr(
3591  MI.getOpcode(),
3592  {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
3593  DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
3594  }
3595 
3596  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3597  LeftoverTy, DstLeftoverRegs);
3598 
3599  MI.eraseFromParent();
3600  return Legalized;
3601 }
3602 
3605  LLT NarrowTy) {
3606  if (TypeIdx != 0)
3607  return UnableToLegalize;
3608 
3609  Register CondReg = MI.getOperand(1).getReg();
3610  LLT CondTy = MRI.getType(CondReg);
3611  if (CondTy.isVector()) // TODO: Handle vselect
3612  return UnableToLegalize;
3613 
3614  Register DstReg = MI.getOperand(0).getReg();
3615  LLT DstTy = MRI.getType(DstReg);
3616 
3617  SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3618  SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3619  SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
3620  LLT LeftoverTy;
3621  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
3622  Src1Regs, Src1LeftoverRegs))
3623  return UnableToLegalize;
3624 
3625  LLT Unused;
3626  if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
3627  Src2Regs, Src2LeftoverRegs))
3628  llvm_unreachable("inconsistent extractParts result");
3629 
3630  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3631  auto Select = MIRBuilder.buildSelect(NarrowTy,
3632  CondReg, Src1Regs[I], Src2Regs[I]);
3633  DstRegs.push_back(Select->getOperand(0).getReg());
3634  }
3635 
3636  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3637  auto Select = MIRBuilder.buildSelect(
3638  LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
3639  DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
3640  }
3641 
3642  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3643  LeftoverTy, DstLeftoverRegs);
3644 
3645  MI.eraseFromParent();
3646  return Legalized;
3647 }
3648 
3651  unsigned Opc = MI.getOpcode();
3652  auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
3653  auto isSupported = [this](const LegalityQuery &Q) {
3654  auto QAction = LI.getAction(Q).Action;
3655  return QAction == Legal || QAction == Libcall || QAction == Custom;
3656  };
3657  switch (Opc) {
3658  default:
3659  return UnableToLegalize;
3660  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
3661  // This trivially expands to CTLZ.
3662  Observer.changingInstr(MI);
3663  MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
3664  Observer.changedInstr(MI);
3665  return Legalized;
3666  }
3667  case TargetOpcode::G_CTLZ: {
3668  Register SrcReg = MI.getOperand(1).getReg();
3669  unsigned Len = Ty.getSizeInBits();
3670  if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
3671  // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
3672  auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
3673  {Ty}, {SrcReg});
3674  auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3675  auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3676  auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3677  SrcReg, MIBZero);
3678  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3679  MIBCtlzZU);
3680  MI.eraseFromParent();
3681  return Legalized;
3682  }
3683  // for now, we do this:
3684  // NewLen = NextPowerOf2(Len);
3685  // x = x | (x >> 1);
3686  // x = x | (x >> 2);
3687  // ...
3688  // x = x | (x >>16);
3689  // x = x | (x >>32); // for 64-bit input
3690  // Upto NewLen/2
3691  // return Len - popcount(x);
3692  //
3693  // Ref: "Hacker's Delight" by Henry Warren
3694  Register Op = SrcReg;
3695  unsigned NewLen = PowerOf2Ceil(Len);
3696  for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
3697  auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
3698  auto MIBOp = MIRBuilder.buildInstr(
3699  TargetOpcode::G_OR, {Ty},
3700  {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
3701  {Op, MIBShiftAmt})});
3702  Op = MIBOp->getOperand(0).getReg();
3703  }
3704  auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
3705  MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3706  {MIRBuilder.buildConstant(Ty, Len), MIBPop});
3707  MI.eraseFromParent();
3708  return Legalized;
3709  }
3710  case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
3711  // This trivially expands to CTTZ.
3712  Observer.changingInstr(MI);
3713  MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
3714  Observer.changedInstr(MI);
3715  return Legalized;
3716  }
3717  case TargetOpcode::G_CTTZ: {
3718  Register SrcReg = MI.getOperand(1).getReg();
3719  unsigned Len = Ty.getSizeInBits();
3720  if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
3721  // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
3722  // zero.
3723  auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
3724  {Ty}, {SrcReg});
3725  auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3726  auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3727  auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3728  SrcReg, MIBZero);
3729  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3730  MIBCttzZU);
3731  MI.eraseFromParent();
3732  return Legalized;
3733  }
3734  // for now, we use: { return popcount(~x & (x - 1)); }
3735  // unless the target has ctlz but not ctpop, in which case we use:
3736  // { return 32 - nlz(~x & (x-1)); }
3737  // Ref: "Hacker's Delight" by Henry Warren
3738  auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
3739  auto MIBNot =
3740  MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
3741  auto MIBTmp = MIRBuilder.buildInstr(
3742  TargetOpcode::G_AND, {Ty},
3743  {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
3744  {SrcReg, MIBCstNeg1})});
3745  if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
3746  isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
3747  auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
3749  TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3750  {MIBCstLen,
3751  MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
3752  MI.eraseFromParent();
3753  return Legalized;
3754  }
3755  MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
3756  MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
3757  return Legalized;
3758  }
3759  }
3760 }
3761 
3762 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
3763 // representation.
3766  Register Dst = MI.getOperand(0).getReg();
3767  Register Src = MI.getOperand(1).getReg();
3768  const LLT S64 = LLT::scalar(64);
3769  const LLT S32 = LLT::scalar(32);
3770  const LLT S1 = LLT::scalar(1);
3771 
3772  assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
3773 
3774  // unsigned cul2f(ulong u) {
3775  // uint lz = clz(u);
3776  // uint e = (u != 0) ? 127U + 63U - lz : 0;
3777  // u = (u << lz) & 0x7fffffffffffffffUL;
3778  // ulong t = u & 0xffffffffffUL;
3779  // uint v = (e << 23) | (uint)(u >> 40);
3780  // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
3781  // return as_float(v + r);
3782  // }
3783 
3784  auto Zero32 = MIRBuilder.buildConstant(S32, 0);
3785  auto Zero64 = MIRBuilder.buildConstant(S64, 0);
3786 
3787  auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
3788 
3789  auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
3790  auto Sub = MIRBuilder.buildSub(S32, K, LZ);
3791 
3792  auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
3793  auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
3794 
3795  auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
3796  auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
3797 
3798  auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
3799 
3800  auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
3801  auto T = MIRBuilder.buildAnd(S64, U, Mask1);
3802 
3803  auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
3804  auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
3805  auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
3806 
3807  auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
3808  auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
3809  auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
3810  auto One = MIRBuilder.buildConstant(S32, 1);
3811 
3812  auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
3813  auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
3814  auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
3815  MIRBuilder.buildAdd(Dst, V, R);
3816 
3817  return Legalized;
3818 }
3819 
3821 LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3822  Register Dst = MI.getOperand(0).getReg();
3823  Register Src = MI.getOperand(1).getReg();
3824  LLT DstTy = MRI.getType(Dst);
3825  LLT SrcTy = MRI.getType(Src);
3826 
3827  if (SrcTy != LLT::scalar(64))
3828  return UnableToLegalize;
3829 
3830  if (DstTy == LLT::scalar(32)) {
3831  // TODO: SelectionDAG has several alternative expansions to port which may
3832  // be more reasonble depending on the available instructions. If a target
3833  // has sitofp, does not have CTLZ, or can efficiently use f64 as an
3834  // intermediate type, this is probably worse.
3835  return lowerU64ToF32BitOps(MI);
3836  }
3837 
3838  return UnableToLegalize;
3839 }
3840 
3842 LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3843  Register Dst = MI.getOperand(0).getReg();
3844  Register Src = MI.getOperand(1).getReg();
3845  LLT DstTy = MRI.getType(Dst);
3846  LLT SrcTy = MRI.getType(Src);
3847 
3848  const LLT S64 = LLT::scalar(64);
3849  const LLT S32 = LLT::scalar(32);
3850  const LLT S1 = LLT::scalar(1);
3851 
3852  if (SrcTy != S64)
3853  return UnableToLegalize;
3854 
3855  if (DstTy == S32) {
3856  // signed cl2f(long l) {
3857  // long s = l >> 63;
3858  // float r = cul2f((l + s) ^ s);
3859  // return s ? -r : r;
3860  // }
3861  Register L = Src;
3862  auto SignBit = MIRBuilder.buildConstant(S64, 63);
3863  auto S = MIRBuilder.buildAShr(S64, L, SignBit);
3864 
3865  auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
3866  auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
3867  auto R = MIRBuilder.buildUITOFP(S32, Xor);
3868 
3869  auto RNeg = MIRBuilder.buildFNeg(S32, R);
3870  auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
3871  MIRBuilder.buildConstant(S64, 0));
3872  MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
3873  return Legalized;
3874  }
3875 
3876  return UnableToLegalize;
3877 }
3878 
3880 LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3881  Register Dst = MI.getOperand(0).getReg();
3882  Register Src = MI.getOperand(1).getReg();
3883  LLT DstTy = MRI.getType(Dst);
3884  LLT SrcTy = MRI.getType(Src);
3885  const LLT S64 = LLT::scalar(64);
3886  const LLT S32 = LLT::scalar(32);
3887 
3888  if (SrcTy != S64 && SrcTy != S32)
3889  return UnableToLegalize;
3890  if (DstTy != S32 && DstTy != S64)
3891  return UnableToLegalize;
3892 
3893  // FPTOSI gives same result as FPTOUI for positive signed integers.
3894  // FPTOUI needs to deal with fp values that convert to unsigned integers
3895  // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
3896 
3897  APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
3898  APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
3899  : APFloat::IEEEdouble(),
3901  TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
3902 
3903  MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
3904 
3906  // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
3907  // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
3908  MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
3909  MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
3910  MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
3911  MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
3912 
3914  MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, DstTy, Src, Threshold);
3915  MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
3916 
3917  MI.eraseFromParent();
3918  return Legalized;
3919 }
3920 
3921 static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
3922  switch (Opc) {
3923  case TargetOpcode::G_SMIN:
3924  return CmpInst::ICMP_SLT;
3925  case TargetOpcode::G_SMAX:
3926  return CmpInst::ICMP_SGT;
3927  case TargetOpcode::G_UMIN:
3928  return CmpInst::ICMP_ULT;
3929  case TargetOpcode::G_UMAX:
3930  return CmpInst::ICMP_UGT;
3931  default:
3932  llvm_unreachable("not in integer min/max");
3933  }
3934 }
3935 
3937 LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3938  Register Dst = MI.getOperand(0).getReg();
3939  Register Src0 = MI.getOperand(1).getReg();
3940  Register Src1 = MI.getOperand(2).getReg();
3941 
3942  const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
3943  LLT CmpType = MRI.getType(Dst).changeElementSize(1);
3944 
3945  auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
3946  MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
3947 
3948  MI.eraseFromParent();
3949  return Legalized;
3950 }
3951 
3954  Register Dst = MI.getOperand(0).getReg();
3955  Register Src0 = MI.getOperand(1).getReg();
3956  Register Src1 = MI.getOperand(2).getReg();
3957 
3958  const LLT Src0Ty = MRI.getType(Src0);
3959  const LLT Src1Ty = MRI.getType(Src1);
3960 
3961  const int Src0Size = Src0Ty.getScalarSizeInBits();
3962  const int Src1Size = Src1Ty.getScalarSizeInBits();
3963 
3964  auto SignBitMask = MIRBuilder.buildConstant(
3965  Src0Ty, APInt::getSignMask(Src0Size));
3966 
3967  auto NotSignBitMask = MIRBuilder.buildConstant(
3968  Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
3969 
3970  auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
3971  MachineInstr *Or;
3972 
3973  if (Src0Ty == Src1Ty) {
3974  auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask);
3975  Or = MIRBuilder.buildOr(Dst, And0, And1);
3976  } else if (Src0Size > Src1Size) {
3977  auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
3978  auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
3979  auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
3980  auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
3981  Or = MIRBuilder.buildOr(Dst, And0, And1);
3982  } else {
3983  auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
3984  auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
3985  auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
3986  auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
3987  Or = MIRBuilder.buildOr(Dst, And0, And1);
3988  }
3989 
3990  // Be careful about setting nsz/nnan/ninf on every instruction, since the
3991  // constants are a nan and -0.0, but the final result should preserve
3992  // everything.
3993  if (unsigned Flags = MI.getFlags())
3994  Or->setFlags(Flags);
3995 
3996  MI.eraseFromParent();
3997  return Legalized;
3998 }
3999 
4002  unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
4003  TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
4004 
4005  Register Dst = MI.getOperand(0).getReg();
4006  Register Src0 = MI.getOperand(1).getReg();
4007  Register Src1 = MI.getOperand(2).getReg();
4008  LLT Ty = MRI.getType(Dst);
4009 
4010  if (!MI.getFlag(MachineInstr::FmNoNans)) {
4011  // Insert canonicalizes if it's possible we need to quiet to get correct
4012  // sNaN behavior.
4013 
4014  // Note this must be done here, and not as an optimization combine in the
4015  // absence of a dedicate quiet-snan instruction as we're using an
4016  // omni-purpose G_FCANONICALIZE.
4017  if (!isKnownNeverSNaN(Src0, MRI))
4018  Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
4019 
4020  if (!isKnownNeverSNaN(Src1, MRI))
4021  Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
4022  }
4023 
4024  // If there are no nans, it's safe to simply replace this with the non-IEEE
4025  // version.
4026  MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
4027  MI.eraseFromParent();
4028  return Legalized;
4029 }
4030 
4032  // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
4033  Register DstReg = MI.getOperand(0).getReg();
4034  LLT Ty = MRI.getType(DstReg);
4035  unsigned Flags = MI.getFlags();
4036 
4037  auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
4038  Flags);
4039  MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
4040  MI.eraseFromParent();
4041  return Legalized;
4042 }
4043 
4046  const unsigned NumDst = MI.getNumOperands() - 1;
4047  const Register SrcReg = MI.getOperand(NumDst).getReg();
4048  LLT SrcTy = MRI.getType(SrcReg);
4049 
4050  Register Dst0Reg = MI.getOperand(0).getReg();
4051  LLT DstTy = MRI.getType(Dst0Reg);
4052 
4053 
4054  // Expand scalarizing unmerge as bitcast to integer and shift.
4055  if (!DstTy.isVector() && SrcTy.isVector() &&
4056  SrcTy.getElementType() == DstTy) {
4057  LLT IntTy = LLT::scalar(SrcTy.getSizeInBits());
4058  Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0);
4059 
4060  MIRBuilder.buildTrunc(Dst0Reg, Cast);
4061 
4062  const unsigned DstSize = DstTy.getSizeInBits();
4063  unsigned Offset = DstSize;
4064  for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
4065  auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
4066  auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt);
4067  MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
4068  }
4069 
4070  MI.eraseFromParent();
4071  return Legalized;
4072  }
4073 
4074  return UnableToLegalize;
4075 }
4076 
4079  Register DstReg = MI.getOperand(0).getReg();
4080  Register Src0Reg = MI.getOperand(1).getReg();
4081  Register Src1Reg = MI.getOperand(2).getReg();
4082  LLT Src0Ty = MRI.getType(Src0Reg);
4083  LLT DstTy = MRI.getType(DstReg);
4084  LLT IdxTy = LLT::scalar(32);
4085 
4086  const Constant *ShufMask = MI.getOperand(3).getShuffleMask();
4087 
4089  ShuffleVectorInst::getShuffleMask(ShufMask, Mask);
4090 
4091  if (DstTy.isScalar()) {
4092  if (Src0Ty.isVector())
4093  return UnableToLegalize;
4094 
4095  // This is just a SELECT.
4096  assert(Mask.size() == 1 && "Expected a single mask element");
4097  Register Val;
4098  if (Mask[0] < 0 || Mask[0] > 1)
4099  Val = MIRBuilder.buildUndef(DstTy).getReg(0);
4100  else
4101  Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
4102  MIRBuilder.buildCopy(DstReg, Val);
4103  MI.eraseFromParent();
4104  return Legalized;
4105  }
4106 
4107  Register Undef;
4108  SmallVector<Register, 32> BuildVec;
4109  LLT EltTy = DstTy.getElementType();
4110 
4111  for (int Idx : Mask) {
4112  if (Idx < 0) {
4113  if (!Undef.isValid())
4114  Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
4115  BuildVec.push_back(Undef);
4116  continue;
4117  }
4118 
4119  if (Src0Ty.isScalar()) {
4120  BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
4121  } else {
4122  int NumElts = Src0Ty.getNumElements();
4123  Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
4124  int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
4125  auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
4126  auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
4127  BuildVec.push_back(Extract.getReg(0));
4128  }
4129  }
4130 
4131  MIRBuilder.buildBuildVector(DstReg, BuildVec);
4132  MI.eraseFromParent();
4133  return Legalized;
4134 }
4135 
4138  Register Dst = MI.getOperand(0).getReg();
4139  Register AllocSize = MI.getOperand(1).getReg();
4140  unsigned Align = MI.getOperand(2).getImm();
4141 
4142  const auto &MF = *MI.getMF();
4143  const auto &TLI = *MF.getSubtarget().getTargetLowering();
4144 
4145  LLT PtrTy = MRI.getType(Dst);
4146  LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
4147 
4148  Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
4149  auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
4150  SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
4151 
4152  // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
4153  // have to generate an extra instruction to negate the alloc and then use
4154  // G_GEP to add the negative offset.
4155  auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
4156  if (Align) {
4157  APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true);
4158  AlignMask.negate();
4159  auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
4160  Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
4161  }
4162 
4163  SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
4164  MIRBuilder.buildCopy(SPReg, SPTmp);
4165  MIRBuilder.buildCopy(Dst, SPTmp);
4166 
4167  MI.eraseFromParent();
4168  return Legalized;
4169 }
4170 
4173  Register Dst = MI.getOperand(0).getReg();
4174  Register Src = MI.getOperand(1).getReg();
4175  unsigned Offset = MI.getOperand(2).getImm();
4176 
4177  LLT DstTy = MRI.getType(Dst);
4178  LLT SrcTy = MRI.getType(Src);
4179 
4180  if (DstTy.isScalar() &&
4181  (SrcTy.isScalar() ||
4182  (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
4183  LLT SrcIntTy = SrcTy;
4184  if (!SrcTy.isScalar()) {
4185  SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
4186  Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
4187  }
4188 
4189  if (Offset == 0)
4190  MIRBuilder.buildTrunc(Dst, Src);
4191  else {
4192  auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
4193  auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
4194  MIRBuilder.buildTrunc(Dst, Shr);
4195  }
4196 
4197  MI.eraseFromParent();
4198  return Legalized;
4199  }
4200 
4201  return UnableToLegalize;
4202 }
4203 
4205  Register Dst = MI.getOperand(0).getReg();
4206  Register Src = MI.getOperand(1).getReg();
4207  Register InsertSrc = MI.getOperand(2).getReg();
4208  uint64_t Offset = MI.getOperand(3).getImm();
4209 
4210  LLT DstTy = MRI.getType(Src);
4211  LLT InsertTy = MRI.getType(InsertSrc);
4212 
4213  if (InsertTy.isScalar() &&
4214  (DstTy.isScalar() ||
4215  (DstTy.isVector() && DstTy.getElementType() == InsertTy))) {
4216  LLT IntDstTy = DstTy;
4217  if (!DstTy.isScalar()) {
4218  IntDstTy = LLT::scalar(DstTy.getSizeInBits());
4219  Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0);
4220  }
4221 
4222  Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
4223  if (Offset != 0) {
4224  auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
4225  ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
4226  }
4227 
4228  APInt MaskVal = ~APInt::getBitsSet(DstTy.getSizeInBits(), Offset,
4229  InsertTy.getSizeInBits());
4230 
4231  auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
4232  auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
4233  auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
4234 
4235  MIRBuilder.buildBitcast(Dst, Or);
4236  MI.eraseFromParent();
4237  return Legalized;
4238  }
4239 
4240  return UnableToLegalize;
4241 }
4242 
4245  Register Dst0 = MI.getOperand(0).getReg();
4246  Register Dst1 = MI.getOperand(1).getReg();
4247  Register LHS = MI.getOperand(2).getReg();
4248  Register RHS = MI.getOperand(3).getReg();
4249  const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
4250 
4251  LLT Ty = MRI.getType(Dst0);
4252  LLT BoolTy = MRI.getType(Dst1);
4253 
4254  if (IsAdd)
4255  MIRBuilder.buildAdd(Dst0, LHS, RHS);
4256  else
4257  MIRBuilder.buildSub(Dst0, LHS, RHS);
4258 
4259  // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
4260 
4261  auto Zero = MIRBuilder.buildConstant(Ty, 0);
4262 
4263  // For an addition, the result should be less than one of the operands (LHS)
4264  // if and only if the other operand (RHS) is negative, otherwise there will
4265  // be overflow.
4266  // For a subtraction, the result should be less than one of the operands
4267  // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
4268  // otherwise there will be overflow.
4269  auto ResultLowerThanLHS =
4270  MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
4271  auto ConditionRHS = MIRBuilder.buildICmp(
4272  IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
4273 
4274  MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
4275  MI.eraseFromParent();
4276  return Legalized;
4277 }
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType)
uint64_t CallInst * C
LegalizeResult fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
const MachineInstrBuilder & add(const MachineOperand &MO) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
MachineOperand Callee
Destination of the call.
Definition: CallLowering.h:77
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:169
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1571
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:888
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineBasicBlock * getMBB() const
LegalizeResult lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:288
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MachineInstrBuilder buildInsert(Register Res, Register Src, Register Op, unsigned Index)
iterator begin() const
Definition: ArrayRef.h:136
Register getReg(unsigned Idx) const
Get the register for the operand index.
bool LoweredTailCall
True if the call was lowered as a tail call.
Definition: CallLowering.h:100
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
unsigned getScalarSizeInBits() const
LegalizeResult fewerElementsVectorMultiEltType(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a instruction with a vector type where each operand may have a different element type...
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:57
unsigned getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
void setFPImm(const ConstantFP *CFP)
LegalizeResult lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
AtomicOrdering getFailureOrdering() const
For cmpxchg atomic operations, return the atomic ordering requirements when store does not occur...
The LegalityQuery object bundles together all the information that&#39;s needed to decide whether a given...
bool isScalar() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LegalizeResult fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
unsigned Reg
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:647
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
Definition: LegalizerInfo.h:62
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
virtual const TargetLowering * getTargetLowering() const
The two locations do not alias at all.
Definition: AliasAnalysis.h:84
unsigned less than
Definition: InstrTypes.h:757
LLT getScalarType() const
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_OR Op0, Op1.
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:865
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
Definition: DataLayout.h:375
static uint32_t Concat[]
bool isTailCall(const MachineInstr &MI) const override
F(f)
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:181
LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstrBuilder buildUAddo(const DstOp &Res, const DstOp &CarryOut, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res, CarryOut = G_UADDO Op0, Op1.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return true if the attribute exists at the given index.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Optional< MachineInstrBuilder > materializeGEP(Register &Res, Register Op0, const LLT &ValueTy, uint64_t Value)
Materialize and insert Res = G_GEP Op0, (G_CONSTANT Value)
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition: Utils.h:167
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert `Res0, ...
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_FADD Op0, Op1.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type...
Definition: LegalizerInfo.h:52
bool isVector() const
void setMF(MachineFunction &MF)
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
A description of a memory reference used in the backend.
bool isSigned() const
Definition: InstrTypes.h:902
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions. ...
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
const HexagonInstrInfo * TII
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:168
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:413
MachineInstrBuilder buildUAdde(const DstOp &Res, const DstOp &CarryOut, const SrcOp &Op0, const SrcOp &Op1, const SrcOp &CarryIn)
Build and insert Res, CarryOut = G_UADDE Op0, Op1, CarryIn.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args)
Helper function that creates the given libcall.
AtomicOrdering getOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FSUB Op0, Op1.
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
Definition: LegalizerInfo.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4483
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, Optional< unsigned > Flags=None)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
LegalizeResult lowerShuffleVector(MachineInstr &MI)
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don&#39;t insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
LegalizeResult lowerInsert(MachineInstr &MI)
uint64_t getSizeInBits() const
Return the size in bits of the memory reference.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
static bool isLibCallInTailPosition(MachineInstr &MI)
True if an instruction is in tail position in its caller.
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:223
void setReg(Register Reg)
Change the register this operand corresponds to.
virtual const TargetInstrInfo * getInstrInfo() const
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:158
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_SUB Op0, Op1.
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
void setChangeObserver(GISelChangeObserver &Observer)
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
bool isReturn(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:646
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
T greatestCommonDivisor(T A, T B)
Return the greatest common divisor of the values using Euclid&#39;s algorithm.
Definition: MathExtras.h:610
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
Abstract class that contains various methods for clients to notify about changes. ...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:465
LegalizeResult legalizeInstrStep(MachineInstr &MI)
Replace MI by a sequence of legal instructions that can implement the same operation.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
virtual bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
Return true if MI is either legal or has been legalized and false if not legal.
Helper class to build MachineInstr.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:587
MachineInstrBuilder buildUMulH(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
LegalizeResult reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:165
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
bool isValid() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
LegalizeResult lowerExtract(MachineInstr &MI)
void setImm(int64_t immVal)
virtual const CallLowering * getCallLowering() const
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI)
Create a libcall to memcpy et al.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getAddressSpace() const
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:448
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildGEP(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_GEP Op0, Op1.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Some kind of error has occurred and we could not legalize this instruction.
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
R600 Clause Merge
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:672
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
Instruction was already legal and no change was made to the MachineFunction.
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:319
size_t size() const
Definition: SmallVector.h:52
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:224
const Constant * getShuffleMask() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:970
signed greater than
Definition: InstrTypes.h:759
static Type * getFP128Ty(LLVMContext &C)
Definition: Type.cpp:173
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
static LLT getGCDType(LLT OrigTy, LLT TargetTy)
LegalizeResult lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
const APFloat & getValueAPF() const
Definition: Constants.h:302
SmallVector< ArgInfo, 8 > OrigArgs
List of descriptors of the arguments passed to the function.
Definition: CallLowering.h:83
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:554
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:167
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:244
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:155
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
LegalizeResult libcall(MachineInstr &MI)
Legalize an instruction by emiting a runtime library call instead.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:551
void setFlags(unsigned flags)
Definition: MachineInstr.h:305
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
MachineOperand class - Representation of each machine instruction operand.
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:746
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, Optional< unsigned > Flags=None)
Build and insert Res = G_FNEG Op0.
LegalizeResult lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Intrinsic::ID getIntrinsicID() const
iterator end() const
Definition: ArrayRef.h:137
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
signed less than
Definition: InstrTypes.h:761
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
Promote Memory to Register
Definition: Mem2Reg.cpp:109
LegalizeResult fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a simple vector instruction where all operands are the same type by splitting into multiple ...
The target wants to do something special with this combination of operand and type.
Definition: LegalizerInfo.h:81
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target...
void negate()
Negate this APInt in place.
Definition: APInt.h:1499
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:653
LegalizeResult lowerFMad(MachineInstr &MI)
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:716
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...