LLVM  9.0.0svn
X86FastISel.cpp
Go to the documentation of this file.
1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the X86-specific support for the FastISel class. Much
10 // of the target-specific code is generated by tablegen in the file
11 // X86GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86CallingConv.h"
17 #include "X86InstrBuilder.h"
18 #include "X86InstrInfo.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86RegisterInfo.h"
21 #include "X86Subtarget.h"
22 #include "X86TargetMachine.h"
24 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/IR/CallSite.h"
30 #include "llvm/IR/CallingConv.h"
31 #include "llvm/IR/DebugInfo.h"
32 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalAlias.h"
35 #include "llvm/IR/GlobalVariable.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/MC/MCAsmInfo.h"
40 #include "llvm/MC/MCSymbol.h"
43 using namespace llvm;
44 
45 namespace {
46 
47 class X86FastISel final : public FastISel {
48  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
49  /// make the right decision when generating code for different targets.
50  const X86Subtarget *Subtarget;
51 
52  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
53  /// floating point ops.
54  /// When SSE is available, use it for f32 operations.
55  /// When SSE2 is available, use it for f64 operations.
56  bool X86ScalarSSEf64;
57  bool X86ScalarSSEf32;
58 
59 public:
60  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
61  const TargetLibraryInfo *libInfo)
62  : FastISel(funcInfo, libInfo) {
63  Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
64  X86ScalarSSEf64 = Subtarget->hasSSE2();
65  X86ScalarSSEf32 = Subtarget->hasSSE1();
66  }
67 
68  bool fastSelectInstruction(const Instruction *I) override;
69 
70  /// The specified machine instr operand is a vreg, and that
71  /// vreg is being provided by the specified load instruction. If possible,
72  /// try to fold the load as an operand to the instruction, returning true if
73  /// possible.
74  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
75  const LoadInst *LI) override;
76 
77  bool fastLowerArguments() override;
78  bool fastLowerCall(CallLoweringInfo &CLI) override;
79  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
80 
81 #include "X86GenFastISel.inc"
82 
83 private:
84  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
85  const DebugLoc &DL);
86 
87  bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
88  unsigned &ResultReg, unsigned Alignment = 1);
89 
90  bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
91  MachineMemOperand *MMO = nullptr, bool Aligned = false);
92  bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
93  X86AddressMode &AM,
94  MachineMemOperand *MMO = nullptr, bool Aligned = false);
95 
96  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
97  unsigned &ResultReg);
98 
99  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
100  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
101 
102  bool X86SelectLoad(const Instruction *I);
103 
104  bool X86SelectStore(const Instruction *I);
105 
106  bool X86SelectRet(const Instruction *I);
107 
108  bool X86SelectCmp(const Instruction *I);
109 
110  bool X86SelectZExt(const Instruction *I);
111 
112  bool X86SelectSExt(const Instruction *I);
113 
114  bool X86SelectBranch(const Instruction *I);
115 
116  bool X86SelectShift(const Instruction *I);
117 
118  bool X86SelectDivRem(const Instruction *I);
119 
120  bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
121 
122  bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
123 
124  bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
125 
126  bool X86SelectSelect(const Instruction *I);
127 
128  bool X86SelectTrunc(const Instruction *I);
129 
130  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
131  const TargetRegisterClass *RC);
132 
133  bool X86SelectFPExt(const Instruction *I);
134  bool X86SelectFPTrunc(const Instruction *I);
135  bool X86SelectSIToFP(const Instruction *I);
136  bool X86SelectUIToFP(const Instruction *I);
137  bool X86SelectIntToFP(const Instruction *I, bool IsSigned);
138 
139  const X86InstrInfo *getInstrInfo() const {
140  return Subtarget->getInstrInfo();
141  }
142  const X86TargetMachine *getTargetMachine() const {
143  return static_cast<const X86TargetMachine *>(&TM);
144  }
145 
146  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
147 
148  unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
149  unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
150  unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
151  unsigned fastMaterializeConstant(const Constant *C) override;
152 
153  unsigned fastMaterializeAlloca(const AllocaInst *C) override;
154 
155  unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
156 
157  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
158  /// computed in an SSE register, not on the X87 floating point stack.
159  bool isScalarFPTypeInSSEReg(EVT VT) const {
160  return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
161  (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
162  }
163 
164  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
165 
166  bool IsMemcpySmall(uint64_t Len);
167 
168  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
169  X86AddressMode SrcAM, uint64_t Len);
170 
171  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
172  const Value *Cond);
173 
175  X86AddressMode &AM);
176 
177  unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
178  const TargetRegisterClass *RC, unsigned Op0,
179  bool Op0IsKill, unsigned Op1, bool Op1IsKill,
180  unsigned Op2, bool Op2IsKill, unsigned Op3,
181  bool Op3IsKill);
182 };
183 
184 } // end anonymous namespace.
185 
186 static std::pair<unsigned, bool>
188  unsigned CC;
189  bool NeedSwap = false;
190 
191  // SSE Condition code mapping:
192  // 0 - EQ
193  // 1 - LT
194  // 2 - LE
195  // 3 - UNORD
196  // 4 - NEQ
197  // 5 - NLT
198  // 6 - NLE
199  // 7 - ORD
200  switch (Predicate) {
201  default: llvm_unreachable("Unexpected predicate");
202  case CmpInst::FCMP_OEQ: CC = 0; break;
203  case CmpInst::FCMP_OGT: NeedSwap = true; LLVM_FALLTHROUGH;
204  case CmpInst::FCMP_OLT: CC = 1; break;
205  case CmpInst::FCMP_OGE: NeedSwap = true; LLVM_FALLTHROUGH;
206  case CmpInst::FCMP_OLE: CC = 2; break;
207  case CmpInst::FCMP_UNO: CC = 3; break;
208  case CmpInst::FCMP_UNE: CC = 4; break;
209  case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH;
210  case CmpInst::FCMP_UGE: CC = 5; break;
211  case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
212  case CmpInst::FCMP_UGT: CC = 6; break;
213  case CmpInst::FCMP_ORD: CC = 7; break;
214  case CmpInst::FCMP_UEQ: CC = 8; break;
215  case CmpInst::FCMP_ONE: CC = 12; break;
216  }
217 
218  return std::make_pair(CC, NeedSwap);
219 }
220 
221 /// Adds a complex addressing mode to the given machine instr builder.
222 /// Note, this will constrain the index register. If its not possible to
223 /// constrain the given index register, then a new one will be created. The
224 /// IndexReg field of the addressing mode will be updated to match in this case.
225 const MachineInstrBuilder &
227  X86AddressMode &AM) {
228  // First constrain the index register. It needs to be a GR64_NOSP.
230  MIB->getNumOperands() +
232  return ::addFullAddress(MIB, AM);
233 }
234 
235 /// Check if it is possible to fold the condition from the XALU intrinsic
236 /// into the user. The condition code will only be updated on success.
237 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
238  const Value *Cond) {
239  if (!isa<ExtractValueInst>(Cond))
240  return false;
241 
242  const auto *EV = cast<ExtractValueInst>(Cond);
243  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
244  return false;
245 
246  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
247  MVT RetVT;
248  const Function *Callee = II->getCalledFunction();
249  Type *RetTy =
250  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
251  if (!isTypeLegal(RetTy, RetVT))
252  return false;
253 
254  if (RetVT != MVT::i32 && RetVT != MVT::i64)
255  return false;
256 
257  X86::CondCode TmpCC;
258  switch (II->getIntrinsicID()) {
259  default: return false;
260  case Intrinsic::sadd_with_overflow:
261  case Intrinsic::ssub_with_overflow:
262  case Intrinsic::smul_with_overflow:
263  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
264  case Intrinsic::uadd_with_overflow:
265  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
266  }
267 
268  // Check if both instructions are in the same basic block.
269  if (II->getParent() != I->getParent())
270  return false;
271 
272  // Make sure nothing is in the way
275  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
276  // We only expect extractvalue instructions between the intrinsic and the
277  // instruction to be selected.
278  if (!isa<ExtractValueInst>(Itr))
279  return false;
280 
281  // Check that the extractvalue operand comes from the intrinsic.
282  const auto *EVI = cast<ExtractValueInst>(Itr);
283  if (EVI->getAggregateOperand() != II)
284  return false;
285  }
286 
287  CC = TmpCC;
288  return true;
289 }
290 
291 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
292  EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
293  if (evt == MVT::Other || !evt.isSimple())
294  // Unhandled type. Halt "fast" selection and bail.
295  return false;
296 
297  VT = evt.getSimpleVT();
298  // For now, require SSE/SSE2 for performing floating-point operations,
299  // since x87 requires additional work.
300  if (VT == MVT::f64 && !X86ScalarSSEf64)
301  return false;
302  if (VT == MVT::f32 && !X86ScalarSSEf32)
303  return false;
304  // Similarly, no f80 support yet.
305  if (VT == MVT::f80)
306  return false;
307  // We only handle legal types. For example, on x86-32 the instruction
308  // selector contains all of the 64-bit instructions from x86-64,
309  // under the assumption that i64 won't be used if the target doesn't
310  // support it.
311  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
312 }
313 
314 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
315 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
316 /// Return true and the result register by reference if it is possible.
317 bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
318  MachineMemOperand *MMO, unsigned &ResultReg,
319  unsigned Alignment) {
320  bool HasSSE41 = Subtarget->hasSSE41();
321  bool HasAVX = Subtarget->hasAVX();
322  bool HasAVX2 = Subtarget->hasAVX2();
323  bool HasAVX512 = Subtarget->hasAVX512();
324  bool HasVLX = Subtarget->hasVLX();
325  bool IsNonTemporal = MMO && MMO->isNonTemporal();
326 
327  // Get opcode and regclass of the output for the given load instruction.
328  unsigned Opc = 0;
329  const TargetRegisterClass *RC = nullptr;
330  switch (VT.getSimpleVT().SimpleTy) {
331  default: return false;
332  case MVT::i1:
333  case MVT::i8:
334  Opc = X86::MOV8rm;
335  RC = &X86::GR8RegClass;
336  break;
337  case MVT::i16:
338  Opc = X86::MOV16rm;
339  RC = &X86::GR16RegClass;
340  break;
341  case MVT::i32:
342  Opc = X86::MOV32rm;
343  RC = &X86::GR32RegClass;
344  break;
345  case MVT::i64:
346  // Must be in x86-64 mode.
347  Opc = X86::MOV64rm;
348  RC = &X86::GR64RegClass;
349  break;
350  case MVT::f32:
351  if (X86ScalarSSEf32) {
352  Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
353  RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
354  } else {
355  Opc = X86::LD_Fp32m;
356  RC = &X86::RFP32RegClass;
357  }
358  break;
359  case MVT::f64:
360  if (X86ScalarSSEf64) {
361  Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
362  RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
363  } else {
364  Opc = X86::LD_Fp64m;
365  RC = &X86::RFP64RegClass;
366  }
367  break;
368  case MVT::f80:
369  // No f80 support yet.
370  return false;
371  case MVT::v4f32:
372  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
373  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
374  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
375  else if (Alignment >= 16)
376  Opc = HasVLX ? X86::VMOVAPSZ128rm :
377  HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
378  else
379  Opc = HasVLX ? X86::VMOVUPSZ128rm :
380  HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
381  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
382  break;
383  case MVT::v2f64:
384  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
385  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
386  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
387  else if (Alignment >= 16)
388  Opc = HasVLX ? X86::VMOVAPDZ128rm :
389  HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
390  else
391  Opc = HasVLX ? X86::VMOVUPDZ128rm :
392  HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
393  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
394  break;
395  case MVT::v4i32:
396  case MVT::v2i64:
397  case MVT::v8i16:
398  case MVT::v16i8:
399  if (IsNonTemporal && Alignment >= 16)
400  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
401  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
402  else if (Alignment >= 16)
403  Opc = HasVLX ? X86::VMOVDQA64Z128rm :
404  HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
405  else
406  Opc = HasVLX ? X86::VMOVDQU64Z128rm :
407  HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
408  RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
409  break;
410  case MVT::v8f32:
411  assert(HasAVX);
412  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
413  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
414  else if (IsNonTemporal && Alignment >= 16)
415  return false; // Force split for X86::VMOVNTDQArm
416  else if (Alignment >= 32)
417  Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
418  else
419  Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
420  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
421  break;
422  case MVT::v4f64:
423  assert(HasAVX);
424  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
425  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
426  else if (IsNonTemporal && Alignment >= 16)
427  return false; // Force split for X86::VMOVNTDQArm
428  else if (Alignment >= 32)
429  Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
430  else
431  Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
432  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
433  break;
434  case MVT::v8i32:
435  case MVT::v4i64:
436  case MVT::v16i16:
437  case MVT::v32i8:
438  assert(HasAVX);
439  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
440  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
441  else if (IsNonTemporal && Alignment >= 16)
442  return false; // Force split for X86::VMOVNTDQArm
443  else if (Alignment >= 32)
444  Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
445  else
446  Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
447  RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
448  break;
449  case MVT::v16f32:
450  assert(HasAVX512);
451  if (IsNonTemporal && Alignment >= 64)
452  Opc = X86::VMOVNTDQAZrm;
453  else
454  Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
455  RC = &X86::VR512RegClass;
456  break;
457  case MVT::v8f64:
458  assert(HasAVX512);
459  if (IsNonTemporal && Alignment >= 64)
460  Opc = X86::VMOVNTDQAZrm;
461  else
462  Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
463  RC = &X86::VR512RegClass;
464  break;
465  case MVT::v8i64:
466  case MVT::v16i32:
467  case MVT::v32i16:
468  case MVT::v64i8:
469  assert(HasAVX512);
470  // Note: There are a lot more choices based on type with AVX-512, but
471  // there's really no advantage when the load isn't masked.
472  if (IsNonTemporal && Alignment >= 64)
473  Opc = X86::VMOVNTDQAZrm;
474  else
475  Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
476  RC = &X86::VR512RegClass;
477  break;
478  }
479 
480  ResultReg = createResultReg(RC);
481  MachineInstrBuilder MIB =
482  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
483  addFullAddress(MIB, AM);
484  if (MMO)
485  MIB->addMemOperand(*FuncInfo.MF, MMO);
486  return true;
487 }
488 
489 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
490 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
491 /// and a displacement offset, or a GlobalAddress,
492 /// i.e. V. Return true if it is possible.
493 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
494  X86AddressMode &AM,
495  MachineMemOperand *MMO, bool Aligned) {
496  bool HasSSE1 = Subtarget->hasSSE1();
497  bool HasSSE2 = Subtarget->hasSSE2();
498  bool HasSSE4A = Subtarget->hasSSE4A();
499  bool HasAVX = Subtarget->hasAVX();
500  bool HasAVX512 = Subtarget->hasAVX512();
501  bool HasVLX = Subtarget->hasVLX();
502  bool IsNonTemporal = MMO && MMO->isNonTemporal();
503 
504  // Get opcode and regclass of the output for the given store instruction.
505  unsigned Opc = 0;
506  switch (VT.getSimpleVT().SimpleTy) {
507  case MVT::f80: // No f80 support yet.
508  default: return false;
509  case MVT::i1: {
510  // Mask out all but lowest bit.
511  unsigned AndResult = createResultReg(&X86::GR8RegClass);
512  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
513  TII.get(X86::AND8ri), AndResult)
514  .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
515  ValReg = AndResult;
516  LLVM_FALLTHROUGH; // handle i1 as i8.
517  }
518  case MVT::i8: Opc = X86::MOV8mr; break;
519  case MVT::i16: Opc = X86::MOV16mr; break;
520  case MVT::i32:
521  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
522  break;
523  case MVT::i64:
524  // Must be in x86-64 mode.
525  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
526  break;
527  case MVT::f32:
528  if (X86ScalarSSEf32) {
529  if (IsNonTemporal && HasSSE4A)
530  Opc = X86::MOVNTSS;
531  else
532  Opc = HasAVX512 ? X86::VMOVSSZmr :
533  HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
534  } else
535  Opc = X86::ST_Fp32m;
536  break;
537  case MVT::f64:
538  if (X86ScalarSSEf32) {
539  if (IsNonTemporal && HasSSE4A)
540  Opc = X86::MOVNTSD;
541  else
542  Opc = HasAVX512 ? X86::VMOVSDZmr :
543  HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
544  } else
545  Opc = X86::ST_Fp64m;
546  break;
547  case MVT::x86mmx:
548  Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
549  break;
550  case MVT::v4f32:
551  if (Aligned) {
552  if (IsNonTemporal)
553  Opc = HasVLX ? X86::VMOVNTPSZ128mr :
554  HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
555  else
556  Opc = HasVLX ? X86::VMOVAPSZ128mr :
557  HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
558  } else
559  Opc = HasVLX ? X86::VMOVUPSZ128mr :
560  HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
561  break;
562  case MVT::v2f64:
563  if (Aligned) {
564  if (IsNonTemporal)
565  Opc = HasVLX ? X86::VMOVNTPDZ128mr :
566  HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
567  else
568  Opc = HasVLX ? X86::VMOVAPDZ128mr :
569  HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
570  } else
571  Opc = HasVLX ? X86::VMOVUPDZ128mr :
572  HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
573  break;
574  case MVT::v4i32:
575  case MVT::v2i64:
576  case MVT::v8i16:
577  case MVT::v16i8:
578  if (Aligned) {
579  if (IsNonTemporal)
580  Opc = HasVLX ? X86::VMOVNTDQZ128mr :
581  HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
582  else
583  Opc = HasVLX ? X86::VMOVDQA64Z128mr :
584  HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
585  } else
586  Opc = HasVLX ? X86::VMOVDQU64Z128mr :
587  HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
588  break;
589  case MVT::v8f32:
590  assert(HasAVX);
591  if (Aligned) {
592  if (IsNonTemporal)
593  Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
594  else
595  Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
596  } else
597  Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
598  break;
599  case MVT::v4f64:
600  assert(HasAVX);
601  if (Aligned) {
602  if (IsNonTemporal)
603  Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
604  else
605  Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
606  } else
607  Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
608  break;
609  case MVT::v8i32:
610  case MVT::v4i64:
611  case MVT::v16i16:
612  case MVT::v32i8:
613  assert(HasAVX);
614  if (Aligned) {
615  if (IsNonTemporal)
616  Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
617  else
618  Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
619  } else
620  Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
621  break;
622  case MVT::v16f32:
623  assert(HasAVX512);
624  if (Aligned)
625  Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
626  else
627  Opc = X86::VMOVUPSZmr;
628  break;
629  case MVT::v8f64:
630  assert(HasAVX512);
631  if (Aligned) {
632  Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
633  } else
634  Opc = X86::VMOVUPDZmr;
635  break;
636  case MVT::v8i64:
637  case MVT::v16i32:
638  case MVT::v32i16:
639  case MVT::v64i8:
640  assert(HasAVX512);
641  // Note: There are a lot more choices based on type with AVX-512, but
642  // there's really no advantage when the store isn't masked.
643  if (Aligned)
644  Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
645  else
646  Opc = X86::VMOVDQU64Zmr;
647  break;
648  }
649 
650  const MCInstrDesc &Desc = TII.get(Opc);
651  // Some of the instructions in the previous switch use FR128 instead
652  // of FR32 for ValReg. Make sure the register we feed the instruction
653  // matches its register class constraints.
654  // Note: This is fine to do a copy from FR32 to FR128, this is the
655  // same registers behind the scene and actually why it did not trigger
656  // any bugs before.
657  ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
658  MachineInstrBuilder MIB =
659  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc);
660  addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
661  if (MMO)
662  MIB->addMemOperand(*FuncInfo.MF, MMO);
663 
664  return true;
665 }
666 
667 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
668  X86AddressMode &AM,
669  MachineMemOperand *MMO, bool Aligned) {
670  // Handle 'null' like i32/i64 0.
671  if (isa<ConstantPointerNull>(Val))
672  Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
673 
674  // If this is a store of a simple constant, fold the constant into the store.
675  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
676  unsigned Opc = 0;
677  bool Signed = true;
678  switch (VT.getSimpleVT().SimpleTy) {
679  default: break;
680  case MVT::i1:
681  Signed = false;
682  LLVM_FALLTHROUGH; // Handle as i8.
683  case MVT::i8: Opc = X86::MOV8mi; break;
684  case MVT::i16: Opc = X86::MOV16mi; break;
685  case MVT::i32: Opc = X86::MOV32mi; break;
686  case MVT::i64:
687  // Must be a 32-bit sign extended value.
688  if (isInt<32>(CI->getSExtValue()))
689  Opc = X86::MOV64mi32;
690  break;
691  }
692 
693  if (Opc) {
694  MachineInstrBuilder MIB =
695  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
696  addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
697  : CI->getZExtValue());
698  if (MMO)
699  MIB->addMemOperand(*FuncInfo.MF, MMO);
700  return true;
701  }
702  }
703 
704  unsigned ValReg = getRegForValue(Val);
705  if (ValReg == 0)
706  return false;
707 
708  bool ValKill = hasTrivialKill(Val);
709  return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
710 }
711 
712 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
713 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
714 /// ISD::SIGN_EXTEND).
715 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
716  unsigned Src, EVT SrcVT,
717  unsigned &ResultReg) {
718  unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
719  Src, /*TODO: Kill=*/false);
720  if (RR == 0)
721  return false;
722 
723  ResultReg = RR;
724  return true;
725 }
726 
727 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
728  // Handle constant address.
729  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
730  // Can't handle alternate code models yet.
731  if (TM.getCodeModel() != CodeModel::Small)
732  return false;
733 
734  // Can't handle TLS yet.
735  if (GV->isThreadLocal())
736  return false;
737 
738  // Can't handle !absolute_symbol references yet.
739  if (GV->isAbsoluteSymbolRef())
740  return false;
741 
742  // RIP-relative addresses can't have additional register operands, so if
743  // we've already folded stuff into the addressing mode, just force the
744  // global value into its own register, which we can use as the basereg.
745  if (!Subtarget->isPICStyleRIPRel() ||
746  (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
747  // Okay, we've committed to selecting this global. Set up the address.
748  AM.GV = GV;
749 
750  // Allow the subtarget to classify the global.
751  unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
752 
753  // If this reference is relative to the pic base, set it now.
754  if (isGlobalRelativeToPICBase(GVFlags)) {
755  // FIXME: How do we know Base.Reg is free??
756  AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
757  }
758 
759  // Unless the ABI requires an extra load, return a direct reference to
760  // the global.
761  if (!isGlobalStubReference(GVFlags)) {
762  if (Subtarget->isPICStyleRIPRel()) {
763  // Use rip-relative addressing if we can. Above we verified that the
764  // base and index registers are unused.
765  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
766  AM.Base.Reg = X86::RIP;
767  }
768  AM.GVOpFlags = GVFlags;
769  return true;
770  }
771 
772  // Ok, we need to do a load from a stub. If we've already loaded from
773  // this stub, reuse the loaded pointer, otherwise emit the load now.
774  DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
775  unsigned LoadReg;
776  if (I != LocalValueMap.end() && I->second != 0) {
777  LoadReg = I->second;
778  } else {
779  // Issue load from stub.
780  unsigned Opc = 0;
781  const TargetRegisterClass *RC = nullptr;
782  X86AddressMode StubAM;
783  StubAM.Base.Reg = AM.Base.Reg;
784  StubAM.GV = GV;
785  StubAM.GVOpFlags = GVFlags;
786 
787  // Prepare for inserting code in the local-value area.
788  SavePoint SaveInsertPt = enterLocalValueArea();
789 
790  if (TLI.getPointerTy(DL) == MVT::i64) {
791  Opc = X86::MOV64rm;
792  RC = &X86::GR64RegClass;
793 
794  if (Subtarget->isPICStyleRIPRel())
795  StubAM.Base.Reg = X86::RIP;
796  } else {
797  Opc = X86::MOV32rm;
798  RC = &X86::GR32RegClass;
799  }
800 
801  LoadReg = createResultReg(RC);
802  MachineInstrBuilder LoadMI =
803  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
804  addFullAddress(LoadMI, StubAM);
805 
806  // Ok, back to normal mode.
807  leaveLocalValueArea(SaveInsertPt);
808 
809  // Prevent loading GV stub multiple times in same MBB.
810  LocalValueMap[V] = LoadReg;
811  }
812 
813  // Now construct the final address. Note that the Disp, Scale,
814  // and Index values may already be set here.
815  AM.Base.Reg = LoadReg;
816  AM.GV = nullptr;
817  return true;
818  }
819  }
820 
821  // If all else fails, try to materialize the value in a register.
822  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
823  if (AM.Base.Reg == 0) {
824  AM.Base.Reg = getRegForValue(V);
825  return AM.Base.Reg != 0;
826  }
827  if (AM.IndexReg == 0) {
828  assert(AM.Scale == 1 && "Scale with no index!");
829  AM.IndexReg = getRegForValue(V);
830  return AM.IndexReg != 0;
831  }
832  }
833 
834  return false;
835 }
836 
837 /// X86SelectAddress - Attempt to fill in an address from the given value.
838 ///
841 redo_gep:
842  const User *U = nullptr;
843  unsigned Opcode = Instruction::UserOp1;
844  if (const Instruction *I = dyn_cast<Instruction>(V)) {
845  // Don't walk into other basic blocks; it's possible we haven't
846  // visited them yet, so the instructions may not yet be assigned
847  // virtual registers.
848  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
849  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
850  Opcode = I->getOpcode();
851  U = I;
852  }
853  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
854  Opcode = C->getOpcode();
855  U = C;
856  }
857 
858  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
859  if (Ty->getAddressSpace() > 255)
860  // Fast instruction selection doesn't support the special
861  // address spaces.
862  return false;
863 
864  switch (Opcode) {
865  default: break;
866  case Instruction::BitCast:
867  // Look past bitcasts.
868  return X86SelectAddress(U->getOperand(0), AM);
869 
870  case Instruction::IntToPtr:
871  // Look past no-op inttoptrs.
872  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
873  TLI.getPointerTy(DL))
874  return X86SelectAddress(U->getOperand(0), AM);
875  break;
876 
877  case Instruction::PtrToInt:
878  // Look past no-op ptrtoints.
879  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
880  return X86SelectAddress(U->getOperand(0), AM);
881  break;
882 
883  case Instruction::Alloca: {
884  // Do static allocas.
885  const AllocaInst *A = cast<AllocaInst>(V);
887  FuncInfo.StaticAllocaMap.find(A);
888  if (SI != FuncInfo.StaticAllocaMap.end()) {
890  AM.Base.FrameIndex = SI->second;
891  return true;
892  }
893  break;
894  }
895 
896  case Instruction::Add: {
897  // Adds of constants are common and easy enough.
898  if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
899  uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
900  // They have to fit in the 32-bit signed displacement field though.
901  if (isInt<32>(Disp)) {
902  AM.Disp = (uint32_t)Disp;
903  return X86SelectAddress(U->getOperand(0), AM);
904  }
905  }
906  break;
907  }
908 
909  case Instruction::GetElementPtr: {
910  X86AddressMode SavedAM = AM;
911 
912  // Pattern-match simple GEPs.
913  uint64_t Disp = (int32_t)AM.Disp;
914  unsigned IndexReg = AM.IndexReg;
915  unsigned Scale = AM.Scale;
917  // Iterate through the indices, folding what we can. Constants can be
918  // folded, and one dynamic index can be handled, if the scale is supported.
919  for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
920  i != e; ++i, ++GTI) {
921  const Value *Op = *i;
922  if (StructType *STy = GTI.getStructTypeOrNull()) {
923  const StructLayout *SL = DL.getStructLayout(STy);
924  Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
925  continue;
926  }
927 
928  // A array/variable index is always of the form i*S where S is the
929  // constant scale size. See if we can push the scale into immediates.
930  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
931  for (;;) {
932  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
933  // Constant-offset addressing.
934  Disp += CI->getSExtValue() * S;
935  break;
936  }
937  if (canFoldAddIntoGEP(U, Op)) {
938  // A compatible add with a constant operand. Fold the constant.
939  ConstantInt *CI =
940  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
941  Disp += CI->getSExtValue() * S;
942  // Iterate on the other operand.
943  Op = cast<AddOperator>(Op)->getOperand(0);
944  continue;
945  }
946  if (IndexReg == 0 &&
947  (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
948  (S == 1 || S == 2 || S == 4 || S == 8)) {
949  // Scaled-index addressing.
950  Scale = S;
951  IndexReg = getRegForGEPIndex(Op).first;
952  if (IndexReg == 0)
953  return false;
954  break;
955  }
956  // Unsupported.
957  goto unsupported_gep;
958  }
959  }
960 
961  // Check for displacement overflow.
962  if (!isInt<32>(Disp))
963  break;
964 
965  AM.IndexReg = IndexReg;
966  AM.Scale = Scale;
967  AM.Disp = (uint32_t)Disp;
968  GEPs.push_back(V);
969 
970  if (const GetElementPtrInst *GEP =
971  dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
972  // Ok, the GEP indices were covered by constant-offset and scaled-index
973  // addressing. Update the address state and move on to examining the base.
974  V = GEP;
975  goto redo_gep;
976  } else if (X86SelectAddress(U->getOperand(0), AM)) {
977  return true;
978  }
979 
980  // If we couldn't merge the gep value into this addr mode, revert back to
981  // our address and just match the value instead of completely failing.
982  AM = SavedAM;
983 
984  for (const Value *I : reverse(GEPs))
985  if (handleConstantAddresses(I, AM))
986  return true;
987 
988  return false;
989  unsupported_gep:
990  // Ok, the GEP indices weren't all covered.
991  break;
992  }
993  }
994 
995  return handleConstantAddresses(V, AM);
996 }
997 
998 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
999 ///
1000 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
1001  const User *U = nullptr;
1002  unsigned Opcode = Instruction::UserOp1;
1003  const Instruction *I = dyn_cast<Instruction>(V);
1004  // Record if the value is defined in the same basic block.
1005  //
1006  // This information is crucial to know whether or not folding an
1007  // operand is valid.
1008  // Indeed, FastISel generates or reuses a virtual register for all
1009  // operands of all instructions it selects. Obviously, the definition and
1010  // its uses must use the same virtual register otherwise the produced
1011  // code is incorrect.
1012  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
1013  // registers for values that are alive across basic blocks. This ensures
1014  // that the values are consistently set between across basic block, even
1015  // if different instruction selection mechanisms are used (e.g., a mix of
1016  // SDISel and FastISel).
1017  // For values local to a basic block, the instruction selection process
1018  // generates these virtual registers with whatever method is appropriate
1019  // for its needs. In particular, FastISel and SDISel do not share the way
1020  // local virtual registers are set.
1021  // Therefore, this is impossible (or at least unsafe) to share values
1022  // between basic blocks unless they use the same instruction selection
1023  // method, which is not guarantee for X86.
1024  // Moreover, things like hasOneUse could not be used accurately, if we
1025  // allow to reference values across basic blocks whereas they are not
1026  // alive across basic blocks initially.
1027  bool InMBB = true;
1028  if (I) {
1029  Opcode = I->getOpcode();
1030  U = I;
1031  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
1032  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
1033  Opcode = C->getOpcode();
1034  U = C;
1035  }
1036 
1037  switch (Opcode) {
1038  default: break;
1039  case Instruction::BitCast:
1040  // Look past bitcasts if its operand is in the same BB.
1041  if (InMBB)
1042  return X86SelectCallAddress(U->getOperand(0), AM);
1043  break;
1044 
1045  case Instruction::IntToPtr:
1046  // Look past no-op inttoptrs if its operand is in the same BB.
1047  if (InMBB &&
1048  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
1049  TLI.getPointerTy(DL))
1050  return X86SelectCallAddress(U->getOperand(0), AM);
1051  break;
1052 
1053  case Instruction::PtrToInt:
1054  // Look past no-op ptrtoints if its operand is in the same BB.
1055  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
1056  return X86SelectCallAddress(U->getOperand(0), AM);
1057  break;
1058  }
1059 
1060  // Handle constant address.
1061  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1062  // Can't handle alternate code models yet.
1063  if (TM.getCodeModel() != CodeModel::Small)
1064  return false;
1065 
1066  // RIP-relative addresses can't have additional register operands.
1067  if (Subtarget->isPICStyleRIPRel() &&
1068  (AM.Base.Reg != 0 || AM.IndexReg != 0))
1069  return false;
1070 
1071  // Can't handle TLS.
1072  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
1073  if (GVar->isThreadLocal())
1074  return false;
1075 
1076  // Okay, we've committed to selecting this global. Set up the basic address.
1077  AM.GV = GV;
1078 
1079  // Return a direct reference to the global. Fastisel can handle calls to
1080  // functions that require loads, such as dllimport and nonlazybind
1081  // functions.
1082  if (Subtarget->isPICStyleRIPRel()) {
1083  // Use rip-relative addressing if we can. Above we verified that the
1084  // base and index registers are unused.
1085  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
1086  AM.Base.Reg = X86::RIP;
1087  } else {
1088  AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
1089  }
1090 
1091  return true;
1092  }
1093 
1094  // If all else fails, try to materialize the value in a register.
1095  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
1096  if (AM.Base.Reg == 0) {
1097  AM.Base.Reg = getRegForValue(V);
1098  return AM.Base.Reg != 0;
1099  }
1100  if (AM.IndexReg == 0) {
1101  assert(AM.Scale == 1 && "Scale with no index!");
1102  AM.IndexReg = getRegForValue(V);
1103  return AM.IndexReg != 0;
1104  }
1105  }
1106 
1107  return false;
1108 }
1109 
1110 
1111 /// X86SelectStore - Select and emit code to implement store instructions.
1112 bool X86FastISel::X86SelectStore(const Instruction *I) {
1113  // Atomic stores need special handling.
1114  const StoreInst *S = cast<StoreInst>(I);
1115 
1116  if (S->isAtomic())
1117  return false;
1118 
1119  const Value *PtrV = I->getOperand(1);
1120  if (TLI.supportSwiftError()) {
1121  // Swifterror values can come from either a function parameter with
1122  // swifterror attribute or an alloca with swifterror attribute.
1123  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1124  if (Arg->hasSwiftErrorAttr())
1125  return false;
1126  }
1127 
1128  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1129  if (Alloca->isSwiftError())
1130  return false;
1131  }
1132  }
1133 
1134  const Value *Val = S->getValueOperand();
1135  const Value *Ptr = S->getPointerOperand();
1136 
1137  MVT VT;
1138  if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
1139  return false;
1140 
1141  unsigned Alignment = S->getAlignment();
1142  unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
1143  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1144  Alignment = ABIAlignment;
1145  bool Aligned = Alignment >= ABIAlignment;
1146 
1147  X86AddressMode AM;
1148  if (!X86SelectAddress(Ptr, AM))
1149  return false;
1150 
1151  return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
1152 }
1153 
1154 /// X86SelectRet - Select and emit code to implement ret instructions.
1155 bool X86FastISel::X86SelectRet(const Instruction *I) {
1156  const ReturnInst *Ret = cast<ReturnInst>(I);
1157  const Function &F = *I->getParent()->getParent();
1158  const X86MachineFunctionInfo *X86MFInfo =
1159  FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
1160 
1161  if (!FuncInfo.CanLowerReturn)
1162  return false;
1163 
1164  if (TLI.supportSwiftError() &&
1165  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1166  return false;
1167 
1168  if (TLI.supportSplitCSR(FuncInfo.MF))
1169  return false;
1170 
1171  CallingConv::ID CC = F.getCallingConv();
1172  if (CC != CallingConv::C &&
1173  CC != CallingConv::Fast &&
1174  CC != CallingConv::X86_FastCall &&
1175  CC != CallingConv::X86_StdCall &&
1176  CC != CallingConv::X86_ThisCall &&
1177  CC != CallingConv::X86_64_SysV &&
1178  CC != CallingConv::Win64)
1179  return false;
1180 
1181  // Don't handle popping bytes if they don't fit the ret's immediate.
1182  if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
1183  return false;
1184 
1185  // fastcc with -tailcallopt is intended to provide a guaranteed
1186  // tail call optimization. Fastisel doesn't know how to do that.
1187  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
1188  return false;
1189 
1190  // Let SDISel handle vararg functions.
1191  if (F.isVarArg())
1192  return false;
1193 
1194  // Build a list of return value registers.
1195  SmallVector<unsigned, 4> RetRegs;
1196 
1197  if (Ret->getNumOperands() > 0) {
1199  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1200 
1201  // Analyze operands of the call, assigning locations to each operand.
1203  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
1204  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1205 
1206  const Value *RV = Ret->getOperand(0);
1207  unsigned Reg = getRegForValue(RV);
1208  if (Reg == 0)
1209  return false;
1210 
1211  // Only handle a single return value for now.
1212  if (ValLocs.size() != 1)
1213  return false;
1214 
1215  CCValAssign &VA = ValLocs[0];
1216 
1217  // Don't bother handling odd stuff for now.
1218  if (VA.getLocInfo() != CCValAssign::Full)
1219  return false;
1220  // Only handle register returns for now.
1221  if (!VA.isRegLoc())
1222  return false;
1223 
1224  // The calling-convention tables for x87 returns don't tell
1225  // the whole story.
1226  if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
1227  return false;
1228 
1229  unsigned SrcReg = Reg + VA.getValNo();
1230  EVT SrcVT = TLI.getValueType(DL, RV->getType());
1231  EVT DstVT = VA.getValVT();
1232  // Special handling for extended integers.
1233  if (SrcVT != DstVT) {
1234  if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1235  return false;
1236 
1237  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1238  return false;
1239 
1240  assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1241 
1242  if (SrcVT == MVT::i1) {
1243  if (Outs[0].Flags.isSExt())
1244  return false;
1245  SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
1246  SrcVT = MVT::i8;
1247  }
1248  unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1250  SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
1251  SrcReg, /*TODO: Kill=*/false);
1252  }
1253 
1254  // Make the copy.
1255  unsigned DstReg = VA.getLocReg();
1256  const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
1257  // Avoid a cross-class copy. This is very unlikely.
1258  if (!SrcRC->contains(DstReg))
1259  return false;
1260  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1261  TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
1262 
1263  // Add register to return instruction.
1264  RetRegs.push_back(VA.getLocReg());
1265  }
1266 
1267  // Swift calling convention does not require we copy the sret argument
1268  // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1269 
1270  // All x86 ABIs require that for returning structs by value we copy
1271  // the sret argument into %rax/%eax (depending on ABI) for the return.
1272  // We saved the argument into a virtual register in the entry block,
1273  // so now we copy the value out and into %rax/%eax.
1274  if (F.hasStructRetAttr() && CC != CallingConv::Swift) {
1275  unsigned Reg = X86MFInfo->getSRetReturnReg();
1276  assert(Reg &&
1277  "SRetReturnReg should have been set in LowerFormalArguments()!");
1278  unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
1279  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1280  TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
1281  RetRegs.push_back(RetReg);
1282  }
1283 
1284  // Now emit the RET.
1285  MachineInstrBuilder MIB;
1286  if (X86MFInfo->getBytesToPopOnReturn()) {
1287  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1288  TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL))
1289  .addImm(X86MFInfo->getBytesToPopOnReturn());
1290  } else {
1291  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1292  TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
1293  }
1294  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1295  MIB.addReg(RetRegs[i], RegState::Implicit);
1296  return true;
1297 }
1298 
1299 /// X86SelectLoad - Select and emit code to implement load instructions.
1300 ///
1301 bool X86FastISel::X86SelectLoad(const Instruction *I) {
1302  const LoadInst *LI = cast<LoadInst>(I);
1303 
1304  // Atomic loads need special handling.
1305  if (LI->isAtomic())
1306  return false;
1307 
1308  const Value *SV = I->getOperand(0);
1309  if (TLI.supportSwiftError()) {
1310  // Swifterror values can come from either a function parameter with
1311  // swifterror attribute or an alloca with swifterror attribute.
1312  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1313  if (Arg->hasSwiftErrorAttr())
1314  return false;
1315  }
1316 
1317  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1318  if (Alloca->isSwiftError())
1319  return false;
1320  }
1321  }
1322 
1323  MVT VT;
1324  if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1325  return false;
1326 
1327  const Value *Ptr = LI->getPointerOperand();
1328 
1329  X86AddressMode AM;
1330  if (!X86SelectAddress(Ptr, AM))
1331  return false;
1332 
1333  unsigned Alignment = LI->getAlignment();
1334  unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
1335  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1336  Alignment = ABIAlignment;
1337 
1338  unsigned ResultReg = 0;
1339  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1340  Alignment))
1341  return false;
1342 
1343  updateValueMap(I, ResultReg);
1344  return true;
1345 }
1346 
1347 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1348  bool HasAVX512 = Subtarget->hasAVX512();
1349  bool HasAVX = Subtarget->hasAVX();
1350  bool X86ScalarSSEf32 = Subtarget->hasSSE1();
1351  bool X86ScalarSSEf64 = Subtarget->hasSSE2();
1352 
1353  switch (VT.getSimpleVT().SimpleTy) {
1354  default: return 0;
1355  case MVT::i8: return X86::CMP8rr;
1356  case MVT::i16: return X86::CMP16rr;
1357  case MVT::i32: return X86::CMP32rr;
1358  case MVT::i64: return X86::CMP64rr;
1359  case MVT::f32:
1360  return X86ScalarSSEf32
1361  ? (HasAVX512 ? X86::VUCOMISSZrr
1362  : HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr)
1363  : 0;
1364  case MVT::f64:
1365  return X86ScalarSSEf64
1366  ? (HasAVX512 ? X86::VUCOMISDZrr
1367  : HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr)
1368  : 0;
1369  }
1370 }
1371 
1372 /// If we have a comparison with RHS as the RHS of the comparison, return an
1373 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1374 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1375  int64_t Val = RHSC->getSExtValue();
1376  switch (VT.getSimpleVT().SimpleTy) {
1377  // Otherwise, we can't fold the immediate into this comparison.
1378  default:
1379  return 0;
1380  case MVT::i8:
1381  return X86::CMP8ri;
1382  case MVT::i16:
1383  if (isInt<8>(Val))
1384  return X86::CMP16ri8;
1385  return X86::CMP16ri;
1386  case MVT::i32:
1387  if (isInt<8>(Val))
1388  return X86::CMP32ri8;
1389  return X86::CMP32ri;
1390  case MVT::i64:
1391  if (isInt<8>(Val))
1392  return X86::CMP64ri8;
1393  // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1394  // field.
1395  if (isInt<32>(Val))
1396  return X86::CMP64ri32;
1397  return 0;
1398  }
1399 }
1400 
1401 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
1402  const DebugLoc &CurDbgLoc) {
1403  unsigned Op0Reg = getRegForValue(Op0);
1404  if (Op0Reg == 0) return false;
1405 
1406  // Handle 'null' like i32/i64 0.
1407  if (isa<ConstantPointerNull>(Op1))
1408  Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1409 
1410  // We have two options: compare with register or immediate. If the RHS of
1411  // the compare is an immediate that we can fold into this compare, use
1412  // CMPri, otherwise use CMPrr.
1413  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1414  if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1415  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
1416  .addReg(Op0Reg)
1417  .addImm(Op1C->getSExtValue());
1418  return true;
1419  }
1420  }
1421 
1422  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1423  if (CompareOpc == 0) return false;
1424 
1425  unsigned Op1Reg = getRegForValue(Op1);
1426  if (Op1Reg == 0) return false;
1427  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
1428  .addReg(Op0Reg)
1429  .addReg(Op1Reg);
1430 
1431  return true;
1432 }
1433 
1434 bool X86FastISel::X86SelectCmp(const Instruction *I) {
1435  const CmpInst *CI = cast<CmpInst>(I);
1436 
1437  MVT VT;
1438  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1439  return false;
1440 
1441  // Try to optimize or fold the cmp.
1442  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1443  unsigned ResultReg = 0;
1444  switch (Predicate) {
1445  default: break;
1446  case CmpInst::FCMP_FALSE: {
1447  ResultReg = createResultReg(&X86::GR32RegClass);
1448  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
1449  ResultReg);
1450  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
1451  X86::sub_8bit);
1452  if (!ResultReg)
1453  return false;
1454  break;
1455  }
1456  case CmpInst::FCMP_TRUE: {
1457  ResultReg = createResultReg(&X86::GR8RegClass);
1458  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
1459  ResultReg).addImm(1);
1460  break;
1461  }
1462  }
1463 
1464  if (ResultReg) {
1465  updateValueMap(I, ResultReg);
1466  return true;
1467  }
1468 
1469  const Value *LHS = CI->getOperand(0);
1470  const Value *RHS = CI->getOperand(1);
1471 
1472  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1473  // We don't have to materialize a zero constant for this case and can just use
1474  // %x again on the RHS.
1475  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1476  const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1477  if (RHSC && RHSC->isNullValue())
1478  RHS = LHS;
1479  }
1480 
1481  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1482  static const uint16_t SETFOpcTable[2][3] = {
1483  { X86::SETEr, X86::SETNPr, X86::AND8rr },
1484  { X86::SETNEr, X86::SETPr, X86::OR8rr }
1485  };
1486  const uint16_t *SETFOpc = nullptr;
1487  switch (Predicate) {
1488  default: break;
1489  case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1490  case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1491  }
1492 
1493  ResultReg = createResultReg(&X86::GR8RegClass);
1494  if (SETFOpc) {
1495  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1496  return false;
1497 
1498  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
1499  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
1500  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
1501  FlagReg1);
1502  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
1503  FlagReg2);
1504  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
1505  ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1506  updateValueMap(I, ResultReg);
1507  return true;
1508  }
1509 
1510  X86::CondCode CC;
1511  bool SwapArgs;
1512  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1513  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1514  unsigned Opc = X86::getSETFromCond(CC);
1515 
1516  if (SwapArgs)
1517  std::swap(LHS, RHS);
1518 
1519  // Emit a compare of LHS/RHS.
1520  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1521  return false;
1522 
1523  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
1524  updateValueMap(I, ResultReg);
1525  return true;
1526 }
1527 
1528 bool X86FastISel::X86SelectZExt(const Instruction *I) {
1529  EVT DstVT = TLI.getValueType(DL, I->getType());
1530  if (!TLI.isTypeLegal(DstVT))
1531  return false;
1532 
1533  unsigned ResultReg = getRegForValue(I->getOperand(0));
1534  if (ResultReg == 0)
1535  return false;
1536 
1537  // Handle zero-extension from i1 to i8, which is common.
1538  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1539  if (SrcVT == MVT::i1) {
1540  // Set the high bits to zero.
1541  ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
1542  SrcVT = MVT::i8;
1543 
1544  if (ResultReg == 0)
1545  return false;
1546  }
1547 
1548  if (DstVT == MVT::i64) {
1549  // Handle extension to 64-bits via sub-register shenanigans.
1550  unsigned MovInst;
1551 
1552  switch (SrcVT.SimpleTy) {
1553  case MVT::i8: MovInst = X86::MOVZX32rr8; break;
1554  case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1555  case MVT::i32: MovInst = X86::MOV32rr; break;
1556  default: llvm_unreachable("Unexpected zext to i64 source type");
1557  }
1558 
1559  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1560  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
1561  .addReg(ResultReg);
1562 
1563  ResultReg = createResultReg(&X86::GR64RegClass);
1564  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
1565  ResultReg)
1566  .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1567  } else if (DstVT == MVT::i16) {
1568  // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1569  // extend to 32-bits and then extract down to 16-bits.
1570  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1571  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8),
1572  Result32).addReg(ResultReg);
1573 
1574  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1575  X86::sub_16bit);
1576  } else if (DstVT != MVT::i8) {
1577  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1578  ResultReg, /*Kill=*/true);
1579  if (ResultReg == 0)
1580  return false;
1581  }
1582 
1583  updateValueMap(I, ResultReg);
1584  return true;
1585 }
1586 
1587 bool X86FastISel::X86SelectSExt(const Instruction *I) {
1588  EVT DstVT = TLI.getValueType(DL, I->getType());
1589  if (!TLI.isTypeLegal(DstVT))
1590  return false;
1591 
1592  unsigned ResultReg = getRegForValue(I->getOperand(0));
1593  if (ResultReg == 0)
1594  return false;
1595 
1596  // Handle sign-extension from i1 to i8.
1597  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1598  if (SrcVT == MVT::i1) {
1599  // Set the high bits to zero.
1600  unsigned ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg,
1601  /*TODO: Kill=*/false);
1602  if (ZExtReg == 0)
1603  return false;
1604 
1605  // Negate the result to make an 8-bit sign extended value.
1606  ResultReg = createResultReg(&X86::GR8RegClass);
1607  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::NEG8r),
1608  ResultReg).addReg(ZExtReg);
1609 
1610  SrcVT = MVT::i8;
1611  }
1612 
1613  if (DstVT == MVT::i16) {
1614  // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1615  // extend to 32-bits and then extract down to 16-bits.
1616  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1617  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8),
1618  Result32).addReg(ResultReg);
1619 
1620  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1621  X86::sub_16bit);
1622  } else if (DstVT != MVT::i8) {
1623  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
1624  ResultReg, /*Kill=*/true);
1625  if (ResultReg == 0)
1626  return false;
1627  }
1628 
1629  updateValueMap(I, ResultReg);
1630  return true;
1631 }
1632 
1633 bool X86FastISel::X86SelectBranch(const Instruction *I) {
1634  // Unconditional branches are selected by tablegen-generated code.
1635  // Handle a conditional branch.
1636  const BranchInst *BI = cast<BranchInst>(I);
1637  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1638  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1639 
1640  // Fold the common case of a conditional branch with a comparison
1641  // in the same block (values defined on other blocks may not have
1642  // initialized registers).
1643  X86::CondCode CC;
1644  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1645  if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1646  EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1647 
1648  // Try to optimize or fold the cmp.
1649  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1650  switch (Predicate) {
1651  default: break;
1652  case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
1653  case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
1654  }
1655 
1656  const Value *CmpLHS = CI->getOperand(0);
1657  const Value *CmpRHS = CI->getOperand(1);
1658 
1659  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1660  // 0.0.
1661  // We don't have to materialize a zero constant for this case and can just
1662  // use %x again on the RHS.
1663  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1664  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1665  if (CmpRHSC && CmpRHSC->isNullValue())
1666  CmpRHS = CmpLHS;
1667  }
1668 
1669  // Try to take advantage of fallthrough opportunities.
1670  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1671  std::swap(TrueMBB, FalseMBB);
1672  Predicate = CmpInst::getInversePredicate(Predicate);
1673  }
1674 
1675  // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1676  // code check. Instead two branch instructions are required to check all
1677  // the flags. First we change the predicate to a supported condition code,
1678  // which will be the first branch. Later one we will emit the second
1679  // branch.
1680  bool NeedExtraBranch = false;
1681  switch (Predicate) {
1682  default: break;
1683  case CmpInst::FCMP_OEQ:
1684  std::swap(TrueMBB, FalseMBB);
1686  case CmpInst::FCMP_UNE:
1687  NeedExtraBranch = true;
1688  Predicate = CmpInst::FCMP_ONE;
1689  break;
1690  }
1691 
1692  bool SwapArgs;
1693  unsigned BranchOpc;
1694  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1695  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1696 
1697  BranchOpc = X86::GetCondBranchFromCond(CC);
1698  if (SwapArgs)
1699  std::swap(CmpLHS, CmpRHS);
1700 
1701  // Emit a compare of the LHS and RHS, setting the flags.
1702  if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
1703  return false;
1704 
1705  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1706  .addMBB(TrueMBB);
1707 
1708  // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1709  // to UNE above).
1710  if (NeedExtraBranch) {
1711  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_1))
1712  .addMBB(TrueMBB);
1713  }
1714 
1715  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1716  return true;
1717  }
1718  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1719  // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1720  // typically happen for _Bool and C++ bools.
1721  MVT SourceVT;
1722  if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1723  isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1724  unsigned TestOpc = 0;
1725  switch (SourceVT.SimpleTy) {
1726  default: break;
1727  case MVT::i8: TestOpc = X86::TEST8ri; break;
1728  case MVT::i16: TestOpc = X86::TEST16ri; break;
1729  case MVT::i32: TestOpc = X86::TEST32ri; break;
1730  case MVT::i64: TestOpc = X86::TEST64ri32; break;
1731  }
1732  if (TestOpc) {
1733  unsigned OpReg = getRegForValue(TI->getOperand(0));
1734  if (OpReg == 0) return false;
1735 
1736  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
1737  .addReg(OpReg).addImm(1);
1738 
1739  unsigned JmpOpc = X86::JNE_1;
1740  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1741  std::swap(TrueMBB, FalseMBB);
1742  JmpOpc = X86::JE_1;
1743  }
1744 
1745  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
1746  .addMBB(TrueMBB);
1747 
1748  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1749  return true;
1750  }
1751  }
1752  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1753  // Fake request the condition, otherwise the intrinsic might be completely
1754  // optimized away.
1755  unsigned TmpReg = getRegForValue(BI->getCondition());
1756  if (TmpReg == 0)
1757  return false;
1758 
1759  unsigned BranchOpc = X86::GetCondBranchFromCond(CC);
1760 
1761  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
1762  .addMBB(TrueMBB);
1763  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1764  return true;
1765  }
1766 
1767  // Otherwise do a clumsy setcc and re-test it.
1768  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1769  // in an explicit cast, so make sure to handle that correctly.
1770  unsigned OpReg = getRegForValue(BI->getCondition());
1771  if (OpReg == 0) return false;
1772 
1773  // In case OpReg is a K register, COPY to a GPR
1774  if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1775  unsigned KOpReg = OpReg;
1776  OpReg = createResultReg(&X86::GR32RegClass);
1777  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1778  TII.get(TargetOpcode::COPY), OpReg)
1779  .addReg(KOpReg);
1780  OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
1781  X86::sub_8bit);
1782  }
1783  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1784  .addReg(OpReg)
1785  .addImm(1);
1786  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
1787  .addMBB(TrueMBB);
1788  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1789  return true;
1790 }
1791 
1792 bool X86FastISel::X86SelectShift(const Instruction *I) {
1793  unsigned CReg = 0, OpReg = 0;
1794  const TargetRegisterClass *RC = nullptr;
1795  if (I->getType()->isIntegerTy(8)) {
1796  CReg = X86::CL;
1797  RC = &X86::GR8RegClass;
1798  switch (I->getOpcode()) {
1799  case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1800  case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1801  case Instruction::Shl: OpReg = X86::SHL8rCL; break;
1802  default: return false;
1803  }
1804  } else if (I->getType()->isIntegerTy(16)) {
1805  CReg = X86::CX;
1806  RC = &X86::GR16RegClass;
1807  switch (I->getOpcode()) {
1808  default: llvm_unreachable("Unexpected shift opcode");
1809  case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1810  case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1811  case Instruction::Shl: OpReg = X86::SHL16rCL; break;
1812  }
1813  } else if (I->getType()->isIntegerTy(32)) {
1814  CReg = X86::ECX;
1815  RC = &X86::GR32RegClass;
1816  switch (I->getOpcode()) {
1817  default: llvm_unreachable("Unexpected shift opcode");
1818  case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1819  case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1820  case Instruction::Shl: OpReg = X86::SHL32rCL; break;
1821  }
1822  } else if (I->getType()->isIntegerTy(64)) {
1823  CReg = X86::RCX;
1824  RC = &X86::GR64RegClass;
1825  switch (I->getOpcode()) {
1826  default: llvm_unreachable("Unexpected shift opcode");
1827  case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1828  case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1829  case Instruction::Shl: OpReg = X86::SHL64rCL; break;
1830  }
1831  } else {
1832  return false;
1833  }
1834 
1835  MVT VT;
1836  if (!isTypeLegal(I->getType(), VT))
1837  return false;
1838 
1839  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1840  if (Op0Reg == 0) return false;
1841 
1842  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1843  if (Op1Reg == 0) return false;
1844  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1845  CReg).addReg(Op1Reg);
1846 
1847  // The shift instruction uses X86::CL. If we defined a super-register
1848  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1849  if (CReg != X86::CL)
1850  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1851  TII.get(TargetOpcode::KILL), X86::CL)
1852  .addReg(CReg, RegState::Kill);
1853 
1854  unsigned ResultReg = createResultReg(RC);
1855  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
1856  .addReg(Op0Reg);
1857  updateValueMap(I, ResultReg);
1858  return true;
1859 }
1860 
1861 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1862  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1863  const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1864  const static bool S = true; // IsSigned
1865  const static bool U = false; // !IsSigned
1866  const static unsigned Copy = TargetOpcode::COPY;
1867  // For the X86 DIV/IDIV instruction, in most cases the dividend
1868  // (numerator) must be in a specific register pair highreg:lowreg,
1869  // producing the quotient in lowreg and the remainder in highreg.
1870  // For most data types, to set up the instruction, the dividend is
1871  // copied into lowreg, and lowreg is sign-extended or zero-extended
1872  // into highreg. The exception is i8, where the dividend is defined
1873  // as a single register rather than a register pair, and we
1874  // therefore directly sign-extend or zero-extend the dividend into
1875  // lowreg, instead of copying, and ignore the highreg.
1876  const static struct DivRemEntry {
1877  // The following portion depends only on the data type.
1878  const TargetRegisterClass *RC;
1879  unsigned LowInReg; // low part of the register pair
1880  unsigned HighInReg; // high part of the register pair
1881  // The following portion depends on both the data type and the operation.
1882  struct DivRemResult {
1883  unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1884  unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1885  // highreg, or copying a zero into highreg.
1886  unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1887  // zero/sign-extending into lowreg for i8.
1888  unsigned DivRemResultReg; // Register containing the desired result.
1889  bool IsOpSigned; // Whether to use signed or unsigned form.
1890  } ResultTable[NumOps];
1891  } OpTable[NumTypes] = {
1892  { &X86::GR8RegClass, X86::AX, 0, {
1893  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1894  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1895  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1896  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1897  }
1898  }, // i8
1899  { &X86::GR16RegClass, X86::AX, X86::DX, {
1900  { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1901  { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1902  { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
1903  { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
1904  }
1905  }, // i16
1906  { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1907  { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1908  { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1909  { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1910  { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1911  }
1912  }, // i32
1913  { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1914  { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1915  { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1916  { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
1917  { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
1918  }
1919  }, // i64
1920  };
1921 
1922  MVT VT;
1923  if (!isTypeLegal(I->getType(), VT))
1924  return false;
1925 
1926  unsigned TypeIndex, OpIndex;
1927  switch (VT.SimpleTy) {
1928  default: return false;
1929  case MVT::i8: TypeIndex = 0; break;
1930  case MVT::i16: TypeIndex = 1; break;
1931  case MVT::i32: TypeIndex = 2; break;
1932  case MVT::i64: TypeIndex = 3;
1933  if (!Subtarget->is64Bit())
1934  return false;
1935  break;
1936  }
1937 
1938  switch (I->getOpcode()) {
1939  default: llvm_unreachable("Unexpected div/rem opcode");
1940  case Instruction::SDiv: OpIndex = 0; break;
1941  case Instruction::SRem: OpIndex = 1; break;
1942  case Instruction::UDiv: OpIndex = 2; break;
1943  case Instruction::URem: OpIndex = 3; break;
1944  }
1945 
1946  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1947  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1948  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1949  if (Op0Reg == 0)
1950  return false;
1951  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1952  if (Op1Reg == 0)
1953  return false;
1954 
1955  // Move op0 into low-order input register.
1956  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1957  TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1958  // Zero-extend or sign-extend into high-order input register.
1959  if (OpEntry.OpSignExtend) {
1960  if (OpEntry.IsOpSigned)
1961  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1962  TII.get(OpEntry.OpSignExtend));
1963  else {
1964  unsigned Zero32 = createResultReg(&X86::GR32RegClass);
1965  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1966  TII.get(X86::MOV32r0), Zero32);
1967 
1968  // Copy the zero into the appropriate sub/super/identical physical
1969  // register. Unfortunately the operations needed are not uniform enough
1970  // to fit neatly into the table above.
1971  if (VT == MVT::i16) {
1972  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1973  TII.get(Copy), TypeEntry.HighInReg)
1974  .addReg(Zero32, 0, X86::sub_16bit);
1975  } else if (VT == MVT::i32) {
1976  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1977  TII.get(Copy), TypeEntry.HighInReg)
1978  .addReg(Zero32);
1979  } else if (VT == MVT::i64) {
1980  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1981  TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1982  .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1983  }
1984  }
1985  }
1986  // Generate the DIV/IDIV instruction.
1987  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1988  TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1989  // For i8 remainder, we can't reference ah directly, as we'll end
1990  // up with bogus copies like %r9b = COPY %ah. Reference ax
1991  // instead to prevent ah references in a rex instruction.
1992  //
1993  // The current assumption of the fast register allocator is that isel
1994  // won't generate explicit references to the GR8_NOREX registers. If
1995  // the allocator and/or the backend get enhanced to be more robust in
1996  // that regard, this can be, and should be, removed.
1997  unsigned ResultReg = 0;
1998  if ((I->getOpcode() == Instruction::SRem ||
1999  I->getOpcode() == Instruction::URem) &&
2000  OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
2001  unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
2002  unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
2003  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2004  TII.get(Copy), SourceSuperReg).addReg(X86::AX);
2005 
2006  // Shift AX right by 8 bits instead of using AH.
2007  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
2008  ResultSuperReg).addReg(SourceSuperReg).addImm(8);
2009 
2010  // Now reference the 8-bit subreg of the result.
2011  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
2012  /*Kill=*/true, X86::sub_8bit);
2013  }
2014  // Copy the result out of the physreg if we haven't already.
2015  if (!ResultReg) {
2016  ResultReg = createResultReg(TypeEntry.RC);
2017  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
2018  .addReg(OpEntry.DivRemResultReg);
2019  }
2020  updateValueMap(I, ResultReg);
2021 
2022  return true;
2023 }
2024 
2025 /// Emit a conditional move instruction (if the are supported) to lower
2026 /// the select.
2027 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
2028  // Check if the subtarget supports these instructions.
2029  if (!Subtarget->hasCMov())
2030  return false;
2031 
2032  // FIXME: Add support for i8.
2033  if (RetVT < MVT::i16 || RetVT > MVT::i64)
2034  return false;
2035 
2036  const Value *Cond = I->getOperand(0);
2037  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2038  bool NeedTest = true;
2040 
2041  // Optimize conditions coming from a compare if both instructions are in the
2042  // same basic block (values defined in other basic blocks may not have
2043  // initialized registers).
2044  const auto *CI = dyn_cast<CmpInst>(Cond);
2045  if (CI && (CI->getParent() == I->getParent())) {
2046  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2047 
2048  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2049  static const uint16_t SETFOpcTable[2][3] = {
2050  { X86::SETNPr, X86::SETEr , X86::TEST8rr },
2051  { X86::SETPr, X86::SETNEr, X86::OR8rr }
2052  };
2053  const uint16_t *SETFOpc = nullptr;
2054  switch (Predicate) {
2055  default: break;
2056  case CmpInst::FCMP_OEQ:
2057  SETFOpc = &SETFOpcTable[0][0];
2058  Predicate = CmpInst::ICMP_NE;
2059  break;
2060  case CmpInst::FCMP_UNE:
2061  SETFOpc = &SETFOpcTable[1][0];
2062  Predicate = CmpInst::ICMP_NE;
2063  break;
2064  }
2065 
2066  bool NeedSwap;
2067  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
2068  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
2069 
2070  const Value *CmpLHS = CI->getOperand(0);
2071  const Value *CmpRHS = CI->getOperand(1);
2072  if (NeedSwap)
2073  std::swap(CmpLHS, CmpRHS);
2074 
2075  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2076  // Emit a compare of the LHS and RHS, setting the flags.
2077  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2078  return false;
2079 
2080  if (SETFOpc) {
2081  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
2082  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
2083  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
2084  FlagReg1);
2085  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
2086  FlagReg2);
2087  auto const &II = TII.get(SETFOpc[2]);
2088  if (II.getNumDefs()) {
2089  unsigned TmpReg = createResultReg(&X86::GR8RegClass);
2090  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
2091  .addReg(FlagReg2).addReg(FlagReg1);
2092  } else {
2093  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2094  .addReg(FlagReg2).addReg(FlagReg1);
2095  }
2096  }
2097  NeedTest = false;
2098  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
2099  // Fake request the condition, otherwise the intrinsic might be completely
2100  // optimized away.
2101  unsigned TmpReg = getRegForValue(Cond);
2102  if (TmpReg == 0)
2103  return false;
2104 
2105  NeedTest = false;
2106  }
2107 
2108  if (NeedTest) {
2109  // Selects operate on i1, however, CondReg is 8 bits width and may contain
2110  // garbage. Indeed, only the less significant bit is supposed to be
2111  // accurate. If we read more than the lsb, we may see non-zero values
2112  // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2113  // the select. This is achieved by performing TEST against 1.
2114  unsigned CondReg = getRegForValue(Cond);
2115  if (CondReg == 0)
2116  return false;
2117  bool CondIsKill = hasTrivialKill(Cond);
2118 
2119  // In case OpReg is a K register, COPY to a GPR
2120  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2121  unsigned KCondReg = CondReg;
2122  CondReg = createResultReg(&X86::GR32RegClass);
2123  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2124  TII.get(TargetOpcode::COPY), CondReg)
2125  .addReg(KCondReg, getKillRegState(CondIsKill));
2126  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2127  X86::sub_8bit);
2128  }
2129  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2130  .addReg(CondReg, getKillRegState(CondIsKill))
2131  .addImm(1);
2132  }
2133 
2134  const Value *LHS = I->getOperand(1);
2135  const Value *RHS = I->getOperand(2);
2136 
2137  unsigned RHSReg = getRegForValue(RHS);
2138  bool RHSIsKill = hasTrivialKill(RHS);
2139 
2140  unsigned LHSReg = getRegForValue(LHS);
2141  bool LHSIsKill = hasTrivialKill(LHS);
2142 
2143  if (!LHSReg || !RHSReg)
2144  return false;
2145 
2146  const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
2147  unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8);
2148  unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
2149  LHSReg, LHSIsKill);
2150  updateValueMap(I, ResultReg);
2151  return true;
2152 }
2153 
2154 /// Emit SSE or AVX instructions to lower the select.
2155 ///
2156 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2157 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2158 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2159 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
2160  // Optimize conditions coming from a compare if both instructions are in the
2161  // same basic block (values defined in other basic blocks may not have
2162  // initialized registers).
2163  const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
2164  if (!CI || (CI->getParent() != I->getParent()))
2165  return false;
2166 
2167  if (I->getType() != CI->getOperand(0)->getType() ||
2168  !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
2169  (Subtarget->hasSSE2() && RetVT == MVT::f64)))
2170  return false;
2171 
2172  const Value *CmpLHS = CI->getOperand(0);
2173  const Value *CmpRHS = CI->getOperand(1);
2174  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2175 
2176  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2177  // We don't have to materialize a zero constant for this case and can just use
2178  // %x again on the RHS.
2179  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
2180  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2181  if (CmpRHSC && CmpRHSC->isNullValue())
2182  CmpRHS = CmpLHS;
2183  }
2184 
2185  unsigned CC;
2186  bool NeedSwap;
2187  std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
2188  if (CC > 7 && !Subtarget->hasAVX())
2189  return false;
2190 
2191  if (NeedSwap)
2192  std::swap(CmpLHS, CmpRHS);
2193 
2194  // Choose the SSE instruction sequence based on data type (float or double).
2195  static const uint16_t OpcTable[2][4] = {
2196  { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2197  { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2198  };
2199 
2200  const uint16_t *Opc = nullptr;
2201  switch (RetVT.SimpleTy) {
2202  default: return false;
2203  case MVT::f32: Opc = &OpcTable[0][0]; break;
2204  case MVT::f64: Opc = &OpcTable[1][0]; break;
2205  }
2206 
2207  const Value *LHS = I->getOperand(1);
2208  const Value *RHS = I->getOperand(2);
2209 
2210  unsigned LHSReg = getRegForValue(LHS);
2211  bool LHSIsKill = hasTrivialKill(LHS);
2212 
2213  unsigned RHSReg = getRegForValue(RHS);
2214  bool RHSIsKill = hasTrivialKill(RHS);
2215 
2216  unsigned CmpLHSReg = getRegForValue(CmpLHS);
2217  bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
2218 
2219  unsigned CmpRHSReg = getRegForValue(CmpRHS);
2220  bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
2221 
2222  if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
2223  return false;
2224 
2225  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2226  unsigned ResultReg;
2227 
2228  if (Subtarget->hasAVX512()) {
2229  // If we have AVX512 we can use a mask compare and masked movss/sd.
2230  const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
2231  const TargetRegisterClass *VK1 = &X86::VK1RegClass;
2232 
2233  unsigned CmpOpcode =
2234  (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2235  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
2236  CmpRHSReg, CmpRHSIsKill, CC);
2237 
2238  // Need an IMPLICIT_DEF for the input that is used to generate the upper
2239  // bits of the result register since its not based on any of the inputs.
2240  unsigned ImplicitDefReg = createResultReg(VR128X);
2241  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2242  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2243 
2244  // Place RHSReg is the passthru of the masked movss/sd operation and put
2245  // LHS in the input. The mask input comes from the compare.
2246  unsigned MovOpcode =
2247  (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2248  unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill,
2249  CmpReg, true, ImplicitDefReg, true,
2250  LHSReg, LHSIsKill);
2251 
2252  ResultReg = createResultReg(RC);
2253  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2254  TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
2255 
2256  } else if (Subtarget->hasAVX()) {
2257  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2258 
2259  // If we have AVX, create 1 blendv instead of 3 logic instructions.
2260  // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2261  // uses XMM0 as the selection register. That may need just as many
2262  // instructions as the AND/ANDN/OR sequence due to register moves, so
2263  // don't bother.
2264  unsigned CmpOpcode =
2265  (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2266  unsigned BlendOpcode =
2267  (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2268 
2269  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
2270  CmpRHSReg, CmpRHSIsKill, CC);
2271  unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
2272  LHSReg, LHSIsKill, CmpReg, true);
2273  ResultReg = createResultReg(RC);
2274  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2275  TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
2276  } else {
2277  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2278  unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
2279  CmpRHSReg, CmpRHSIsKill, CC);
2280  unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
2281  LHSReg, LHSIsKill);
2282  unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
2283  RHSReg, RHSIsKill);
2284  unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
2285  AndReg, /*IsKill=*/true);
2286  ResultReg = createResultReg(RC);
2287  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2288  TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
2289  }
2290  updateValueMap(I, ResultReg);
2291  return true;
2292 }
2293 
2294 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
2295  // These are pseudo CMOV instructions and will be later expanded into control-
2296  // flow.
2297  unsigned Opc;
2298  switch (RetVT.SimpleTy) {
2299  default: return false;
2300  case MVT::i8: Opc = X86::CMOV_GR8; break;
2301  case MVT::i16: Opc = X86::CMOV_GR16; break;
2302  case MVT::i32: Opc = X86::CMOV_GR32; break;
2303  case MVT::f32: Opc = X86::CMOV_FR32; break;
2304  case MVT::f64: Opc = X86::CMOV_FR64; break;
2305  }
2306 
2307  const Value *Cond = I->getOperand(0);
2309 
2310  // Optimize conditions coming from a compare if both instructions are in the
2311  // same basic block (values defined in other basic blocks may not have
2312  // initialized registers).
2313  const auto *CI = dyn_cast<CmpInst>(Cond);
2314  if (CI && (CI->getParent() == I->getParent())) {
2315  bool NeedSwap;
2316  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
2317  if (CC > X86::LAST_VALID_COND)
2318  return false;
2319 
2320  const Value *CmpLHS = CI->getOperand(0);
2321  const Value *CmpRHS = CI->getOperand(1);
2322 
2323  if (NeedSwap)
2324  std::swap(CmpLHS, CmpRHS);
2325 
2326  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2327  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2328  return false;
2329  } else {
2330  unsigned CondReg = getRegForValue(Cond);
2331  if (CondReg == 0)
2332  return false;
2333  bool CondIsKill = hasTrivialKill(Cond);
2334 
2335  // In case OpReg is a K register, COPY to a GPR
2336  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2337  unsigned KCondReg = CondReg;
2338  CondReg = createResultReg(&X86::GR32RegClass);
2339  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2340  TII.get(TargetOpcode::COPY), CondReg)
2341  .addReg(KCondReg, getKillRegState(CondIsKill));
2342  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2343  X86::sub_8bit);
2344  }
2345  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2346  .addReg(CondReg, getKillRegState(CondIsKill))
2347  .addImm(1);
2348  }
2349 
2350  const Value *LHS = I->getOperand(1);
2351  const Value *RHS = I->getOperand(2);
2352 
2353  unsigned LHSReg = getRegForValue(LHS);
2354  bool LHSIsKill = hasTrivialKill(LHS);
2355 
2356  unsigned RHSReg = getRegForValue(RHS);
2357  bool RHSIsKill = hasTrivialKill(RHS);
2358 
2359  if (!LHSReg || !RHSReg)
2360  return false;
2361 
2362  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2363 
2364  unsigned ResultReg =
2365  fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
2366  updateValueMap(I, ResultReg);
2367  return true;
2368 }
2369 
2370 bool X86FastISel::X86SelectSelect(const Instruction *I) {
2371  MVT RetVT;
2372  if (!isTypeLegal(I->getType(), RetVT))
2373  return false;
2374 
2375  // Check if we can fold the select.
2376  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2377  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2378  const Value *Opnd = nullptr;
2379  switch (Predicate) {
2380  default: break;
2381  case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2382  case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
2383  }
2384  // No need for a select anymore - this is an unconditional move.
2385  if (Opnd) {
2386  unsigned OpReg = getRegForValue(Opnd);
2387  if (OpReg == 0)
2388  return false;
2389  bool OpIsKill = hasTrivialKill(Opnd);
2390  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2391  unsigned ResultReg = createResultReg(RC);
2392  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2393  TII.get(TargetOpcode::COPY), ResultReg)
2394  .addReg(OpReg, getKillRegState(OpIsKill));
2395  updateValueMap(I, ResultReg);
2396  return true;
2397  }
2398  }
2399 
2400  // First try to use real conditional move instructions.
2401  if (X86FastEmitCMoveSelect(RetVT, I))
2402  return true;
2403 
2404  // Try to use a sequence of SSE instructions to simulate a conditional move.
2405  if (X86FastEmitSSESelect(RetVT, I))
2406  return true;
2407 
2408  // Fall-back to pseudo conditional move instructions, which will be later
2409  // converted to control-flow.
2410  if (X86FastEmitPseudoSelect(RetVT, I))
2411  return true;
2412 
2413  return false;
2414 }
2415 
2416 // Common code for X86SelectSIToFP and X86SelectUIToFP.
2417 bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
2418  // The target-independent selection algorithm in FastISel already knows how
2419  // to select a SINT_TO_FP if the target is SSE but not AVX.
2420  // Early exit if the subtarget doesn't have AVX.
2421  // Unsigned conversion requires avx512.
2422  bool HasAVX512 = Subtarget->hasAVX512();
2423  if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
2424  return false;
2425 
2426  // TODO: We could sign extend narrower types.
2427  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
2428  if (SrcVT != MVT::i32 && SrcVT != MVT::i64)
2429  return false;
2430 
2431  // Select integer to float/double conversion.
2432  unsigned OpReg = getRegForValue(I->getOperand(0));
2433  if (OpReg == 0)
2434  return false;
2435 
2436  unsigned Opcode;
2437 
2438  static const uint16_t SCvtOpc[2][2][2] = {
2439  { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr },
2440  { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } },
2441  { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
2442  { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
2443  };
2444  static const uint16_t UCvtOpc[2][2] = {
2445  { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
2446  { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
2447  };
2448  bool Is64Bit = SrcVT == MVT::i64;
2449 
2450  if (I->getType()->isDoubleTy()) {
2451  // s/uitofp int -> double
2452  Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
2453  } else if (I->getType()->isFloatTy()) {
2454  // s/uitofp int -> float
2455  Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
2456  } else
2457  return false;
2458 
2459  MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT();
2460  const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT);
2461  unsigned ImplicitDefReg = createResultReg(RC);
2462  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2463  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2464  unsigned ResultReg =
2465  fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
2466  updateValueMap(I, ResultReg);
2467  return true;
2468 }
2469 
2470 bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
2471  return X86SelectIntToFP(I, /*IsSigned*/true);
2472 }
2473 
2474 bool X86FastISel::X86SelectUIToFP(const Instruction *I) {
2475  return X86SelectIntToFP(I, /*IsSigned*/false);
2476 }
2477 
2478 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2479 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
2480  unsigned TargetOpc,
2481  const TargetRegisterClass *RC) {
2482  assert((I->getOpcode() == Instruction::FPExt ||
2483  I->getOpcode() == Instruction::FPTrunc) &&
2484  "Instruction must be an FPExt or FPTrunc!");
2485 
2486  unsigned OpReg = getRegForValue(I->getOperand(0));
2487  if (OpReg == 0)
2488  return false;
2489 
2490  unsigned ImplicitDefReg;
2491  if (Subtarget->hasAVX()) {
2492  ImplicitDefReg = createResultReg(RC);
2493  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2494  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2495 
2496  }
2497 
2498  unsigned ResultReg = createResultReg(RC);
2499  MachineInstrBuilder MIB;
2500  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
2501  ResultReg);
2502 
2503  if (Subtarget->hasAVX())
2504  MIB.addReg(ImplicitDefReg);
2505 
2506  MIB.addReg(OpReg);
2507  updateValueMap(I, ResultReg);
2508  return true;
2509 }
2510 
2511 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2512  if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
2513  I->getOperand(0)->getType()->isFloatTy()) {
2514  bool HasAVX512 = Subtarget->hasAVX512();
2515  // fpext from float to double.
2516  unsigned Opc =
2517  HasAVX512 ? X86::VCVTSS2SDZrr
2518  : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2519  return X86SelectFPExtOrFPTrunc(
2520  I, Opc, HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass);
2521  }
2522 
2523  return false;
2524 }
2525 
2526 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2527  if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
2528  I->getOperand(0)->getType()->isDoubleTy()) {
2529  bool HasAVX512 = Subtarget->hasAVX512();
2530  // fptrunc from double to float.
2531  unsigned Opc =
2532  HasAVX512 ? X86::VCVTSD2SSZrr
2533  : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2534  return X86SelectFPExtOrFPTrunc(
2535  I, Opc, HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass);
2536  }
2537 
2538  return false;
2539 }
2540 
2541 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2542  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
2543  EVT DstVT = TLI.getValueType(DL, I->getType());
2544 
2545  // This code only handles truncation to byte.
2546  if (DstVT != MVT::i8 && DstVT != MVT::i1)
2547  return false;
2548  if (!TLI.isTypeLegal(SrcVT))
2549  return false;
2550 
2551  unsigned InputReg = getRegForValue(I->getOperand(0));
2552  if (!InputReg)
2553  // Unhandled operand. Halt "fast" selection and bail.
2554  return false;
2555 
2556  if (SrcVT == MVT::i8) {
2557  // Truncate from i8 to i1; no code needed.
2558  updateValueMap(I, InputReg);
2559  return true;
2560  }
2561 
2562  // Issue an extract_subreg.
2563  unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
2564  InputReg, false,
2565  X86::sub_8bit);
2566  if (!ResultReg)
2567  return false;
2568 
2569  updateValueMap(I, ResultReg);
2570  return true;
2571 }
2572 
2573 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2574  return Len <= (Subtarget->is64Bit() ? 32 : 16);
2575 }
2576 
2577 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2578  X86AddressMode SrcAM, uint64_t Len) {
2579 
2580  // Make sure we don't bloat code by inlining very large memcpy's.
2581  if (!IsMemcpySmall(Len))
2582  return false;
2583 
2584  bool i64Legal = Subtarget->is64Bit();
2585 
2586  // We don't care about alignment here since we just emit integer accesses.
2587  while (Len) {
2588  MVT VT;
2589  if (Len >= 8 && i64Legal)
2590  VT = MVT::i64;
2591  else if (Len >= 4)
2592  VT = MVT::i32;
2593  else if (Len >= 2)
2594  VT = MVT::i16;
2595  else
2596  VT = MVT::i8;
2597 
2598  unsigned Reg;
2599  bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2600  RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
2601  assert(RV && "Failed to emit load or store??");
2602 
2603  unsigned Size = VT.getSizeInBits()/8;
2604  Len -= Size;
2605  DestAM.Disp += Size;
2606  SrcAM.Disp += Size;
2607  }
2608 
2609  return true;
2610 }
2611 
2612 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2613  // FIXME: Handle more intrinsics.
2614  switch (II->getIntrinsicID()) {
2615  default: return false;
2616  case Intrinsic::convert_from_fp16:
2617  case Intrinsic::convert_to_fp16: {
2618  if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2619  return false;
2620 
2621  const Value *Op = II->getArgOperand(0);
2622  unsigned InputReg = getRegForValue(Op);
2623  if (InputReg == 0)
2624  return false;
2625 
2626  // F16C only allows converting from float to half and from half to float.
2627  bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
2628  if (IsFloatToHalf) {
2629  if (!Op->getType()->isFloatTy())
2630  return false;
2631  } else {
2632  if (!II->getType()->isFloatTy())
2633  return false;
2634  }
2635 
2636  unsigned ResultReg = 0;
2637  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
2638  if (IsFloatToHalf) {
2639  // 'InputReg' is implicitly promoted from register class FR32 to
2640  // register class VR128 by method 'constrainOperandRegClass' which is
2641  // directly called by 'fastEmitInst_ri'.
2642  // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2643  // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2644  // It's consistent with the other FP instructions, which are usually
2645  // controlled by MXCSR.
2646  InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4);
2647 
2648  // Move the lower 32-bits of ResultReg to another register of class GR32.
2649  ResultReg = createResultReg(&X86::GR32RegClass);
2650  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2651  TII.get(X86::VMOVPDI2DIrr), ResultReg)
2652  .addReg(InputReg, RegState::Kill);
2653 
2654  // The result value is in the lower 16-bits of ResultReg.
2655  unsigned RegIdx = X86::sub_16bit;
2656  ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
2657  } else {
2658  assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2659  // Explicitly sign-extend the input to 32-bit.
2660  InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg,
2661  /*Kill=*/false);
2662 
2663  // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2664  InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2665  InputReg, /*Kill=*/true);
2666 
2667  InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
2668 
2669  // The result value is in the lower 32-bits of ResultReg.
2670  // Emit an explicit copy from register class VR128 to register class FR32.
2671  ResultReg = createResultReg(&X86::FR32RegClass);
2672  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2673  TII.get(TargetOpcode::COPY), ResultReg)
2674  .addReg(InputReg, RegState::Kill);
2675  }
2676 
2677  updateValueMap(II, ResultReg);
2678  return true;
2679  }
2680  case Intrinsic::frameaddress: {
2681  MachineFunction *MF = FuncInfo.MF;
2682  if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
2683  return false;
2684 
2685  Type *RetTy = II->getCalledFunction()->getReturnType();
2686 
2687  MVT VT;
2688  if (!isTypeLegal(RetTy, VT))
2689  return false;
2690 
2691  unsigned Opc;
2692  const TargetRegisterClass *RC = nullptr;
2693 
2694  switch (VT.SimpleTy) {
2695  default: llvm_unreachable("Invalid result type for frameaddress.");
2696  case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2697  case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2698  }
2699 
2700  // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2701  // we get the wrong frame register.
2702  MachineFrameInfo &MFI = MF->getFrameInfo();
2703  MFI.setFrameAddressIsTaken(true);
2704 
2705  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2706  unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
2707  assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2708  (FrameReg == X86::EBP && VT == MVT::i32)) &&
2709  "Invalid Frame Register!");
2710 
2711  // Always make a copy of the frame register to a vreg first, so that we
2712  // never directly reference the frame register (the TwoAddressInstruction-
2713  // Pass doesn't like that).
2714  unsigned SrcReg = createResultReg(RC);
2715  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2716  TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2717 
2718  // Now recursively load from the frame address.
2719  // movq (%rbp), %rax
2720  // movq (%rax), %rax
2721  // movq (%rax), %rax
2722  // ...
2723  unsigned DestReg;
2724  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2725  while (Depth--) {
2726  DestReg = createResultReg(RC);
2727  addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2728  TII.get(Opc), DestReg), SrcReg);
2729  SrcReg = DestReg;
2730  }
2731 
2732  updateValueMap(II, SrcReg);
2733  return true;
2734  }
2735  case Intrinsic::memcpy: {
2736  const MemCpyInst *MCI = cast<MemCpyInst>(II);
2737  // Don't handle volatile or variable length memcpys.
2738  if (MCI->isVolatile())
2739  return false;
2740 
2741  if (isa<ConstantInt>(MCI->getLength())) {
2742  // Small memcpy's are common enough that we want to do them
2743  // without a call if possible.
2744  uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
2745  if (IsMemcpySmall(Len)) {
2746  X86AddressMode DestAM, SrcAM;
2747  if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
2748  !X86SelectAddress(MCI->getRawSource(), SrcAM))
2749  return false;
2750  TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2751  return true;
2752  }
2753  }
2754 
2755  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2756  if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
2757  return false;
2758 
2759  if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
2760  return false;
2761 
2762  return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1);
2763  }
2764  case Intrinsic::memset: {
2765  const MemSetInst *MSI = cast<MemSetInst>(II);
2766 
2767  if (MSI->isVolatile())
2768  return false;
2769 
2770  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2771  if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
2772  return false;
2773 
2774  if (MSI->getDestAddressSpace() > 255)
2775  return false;
2776 
2777  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
2778  }
2779  case Intrinsic::stackprotector: {
2780  // Emit code to store the stack guard onto the stack.
2781  EVT PtrTy = TLI.getPointerTy(DL);
2782 
2783  const Value *Op1 = II->getArgOperand(0); // The guard's value.
2784  const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
2785 
2786  MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2787 
2788  // Grab the frame index.
2789  X86AddressMode AM;
2790  if (!X86SelectAddress(Slot, AM)) return false;
2791  if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2792  return true;
2793  }
2794  case Intrinsic::dbg_declare: {
2795  const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
2796  X86AddressMode AM;
2797  assert(DI->getAddress() && "Null address should be checked earlier!");
2798  if (!X86SelectAddress(DI->getAddress(), AM))
2799  return false;
2800  const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2801  // FIXME may need to add RegState::Debug to any registers produced,
2802  // although ESP/EBP should be the only ones at the moment.
2804  "Expected inlined-at fields to agree");
2805  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
2806  .addImm(0)
2807  .addMetadata(DI->getVariable())
2808  .addMetadata(DI->getExpression());
2809  return true;
2810  }
2811  case Intrinsic::trap: {
2812  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
2813  return true;
2814  }
2815  case Intrinsic::sqrt: {
2816  if (!Subtarget->hasSSE1())
2817  return false;
2818 
2819  Type *RetTy = II->getCalledFunction()->getReturnType();
2820 
2821  MVT VT;
2822  if (!isTypeLegal(RetTy, VT))
2823  return false;
2824 
2825  // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2826  // is not generated by FastISel yet.
2827  // FIXME: Update this code once tablegen can handle it.
2828  static const uint16_t SqrtOpc[3][2] = {
2829  { X86::SQRTSSr, X86::SQRTSDr },
2830  { X86::VSQRTSSr, X86::VSQRTSDr },
2831  { X86::VSQRTSSZr, X86::VSQRTSDZr },
2832  };
2833  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
2834  Subtarget->hasAVX() ? 1 :
2835  0;
2836  unsigned Opc;
2837  switch (VT.SimpleTy) {
2838  default: return false;
2839  case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break;
2840  case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break;
2841  }
2842 
2843  const Value *SrcVal = II->getArgOperand(0);
2844  unsigned SrcReg = getRegForValue(SrcVal);
2845 
2846  if (SrcReg == 0)
2847  return false;
2848 
2849  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
2850  unsigned ImplicitDefReg = 0;
2851  if (AVXLevel > 0) {
2852  ImplicitDefReg = createResultReg(RC);
2853  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2854  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2855  }
2856 
2857  unsigned ResultReg = createResultReg(RC);
2858  MachineInstrBuilder MIB;
2859  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2860  ResultReg);
2861 
2862  if (ImplicitDefReg)
2863  MIB.addReg(ImplicitDefReg);
2864 
2865  MIB.addReg(SrcReg);
2866 
2867  updateValueMap(II, ResultReg);
2868  return true;
2869  }
2870  case Intrinsic::sadd_with_overflow:
2871  case Intrinsic::uadd_with_overflow:
2872  case Intrinsic::ssub_with_overflow:
2873  case Intrinsic::usub_with_overflow:
2874  case Intrinsic::smul_with_overflow:
2875  case Intrinsic::umul_with_overflow: {
2876  // This implements the basic lowering of the xalu with overflow intrinsics
2877  // into add/sub/mul followed by either seto or setb.
2878  const Function *Callee = II->getCalledFunction();
2879  auto *Ty = cast<StructType>(Callee->getReturnType());
2880  Type *RetTy = Ty->getTypeAtIndex(0U);
2881  assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
2882  Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2883  "Overflow value expected to be an i1");
2884 
2885  MVT VT;
2886  if (!isTypeLegal(RetTy, VT))
2887  return false;
2888 
2889  if (VT < MVT::i8 || VT > MVT::i64)
2890  return false;
2891 
2892  const Value *LHS = II->getArgOperand(0);
2893  const Value *RHS = II->getArgOperand(1);
2894 
2895  // Canonicalize immediate to the RHS.
2896  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2897  isCommutativeIntrinsic(II))
2898  std::swap(LHS, RHS);
2899 
2900  unsigned BaseOpc, CondOpc;
2901  switch (II->getIntrinsicID()) {
2902  default: llvm_unreachable("Unexpected intrinsic!");
2903  case Intrinsic::sadd_with_overflow:
2904  BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
2905  case Intrinsic::uadd_with_overflow:
2906  BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
2907  case Intrinsic::ssub_with_overflow:
2908  BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
2909  case Intrinsic::usub_with_overflow:
2910  BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
2911  case Intrinsic::smul_with_overflow:
2912  BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
2913  case Intrinsic::umul_with_overflow:
2914  BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
2915  }
2916 
2917  unsigned LHSReg = getRegForValue(LHS);
2918  if (LHSReg == 0)
2919  return false;
2920  bool LHSIsKill = hasTrivialKill(LHS);
2921 
2922  unsigned ResultReg = 0;
2923  // Check if we have an immediate version.
2924  if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2925  static const uint16_t Opc[2][4] = {
2926  { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2927  { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2928  };
2929 
2930  if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
2931  CondOpc == X86::SETOr) {
2932  // We can use INC/DEC.
2933  ResultReg = createResultReg(TLI.getRegClassFor(VT));
2934  bool IsDec = BaseOpc == ISD::SUB;
2935  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2936  TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2937  .addReg(LHSReg, getKillRegState(LHSIsKill));
2938  } else
2939  ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
2940  CI->getZExtValue());
2941  }
2942 
2943  unsigned RHSReg;
2944  bool RHSIsKill;
2945  if (!ResultReg) {
2946  RHSReg = getRegForValue(RHS);
2947  if (RHSReg == 0)
2948  return false;
2949  RHSIsKill = hasTrivialKill(RHS);
2950  ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
2951  RHSIsKill);
2952  }
2953 
2954  // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2955  // it manually.
2956  if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2957  static const uint16_t MULOpc[] =
2958  { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2959  static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2960  // First copy the first operand into RAX, which is an implicit input to
2961  // the X86::MUL*r instruction.
2962  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2963  TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2964  .addReg(LHSReg, getKillRegState(LHSIsKill));
2965  ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2966  TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
2967  } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2968  static const uint16_t MULOpc[] =
2969  { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2970  if (VT == MVT::i8) {
2971  // Copy the first operand into AL, which is an implicit input to the
2972  // X86::IMUL8r instruction.
2973  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2974  TII.get(TargetOpcode::COPY), X86::AL)
2975  .addReg(LHSReg, getKillRegState(LHSIsKill));
2976  ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
2977  RHSIsKill);
2978  } else
2979  ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2980  TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
2981  RHSReg, RHSIsKill);
2982  }
2983 
2984  if (!ResultReg)
2985  return false;
2986 
2987  // Assign to a GPR since the overflow return value is lowered to a SETcc.
2988  unsigned ResultReg2 = createResultReg(&X86::GR8RegClass);
2989  assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2990  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
2991  ResultReg2);
2992 
2993  updateValueMap(II, ResultReg, 2);
2994  return true;
2995  }
2996  case Intrinsic::x86_sse_cvttss2si:
2997  case Intrinsic::x86_sse_cvttss2si64:
2998  case Intrinsic::x86_sse2_cvttsd2si:
2999  case Intrinsic::x86_sse2_cvttsd2si64: {
3000  bool IsInputDouble;
3001  switch (II->getIntrinsicID()) {
3002  default: llvm_unreachable("Unexpected intrinsic.");
3003  case Intrinsic::x86_sse_cvttss2si:
3004  case Intrinsic::x86_sse_cvttss2si64:
3005  if (!Subtarget->hasSSE1())
3006  return false;
3007  IsInputDouble = false;
3008  break;
3009  case Intrinsic::x86_sse2_cvttsd2si:
3010  case Intrinsic::x86_sse2_cvttsd2si64:
3011  if (!Subtarget->hasSSE2())
3012  return false;
3013  IsInputDouble = true;
3014  break;
3015  }
3016 
3017  Type *RetTy = II->getCalledFunction()->getReturnType();
3018  MVT VT;
3019  if (!isTypeLegal(RetTy, VT))
3020  return false;
3021 
3022  static const uint16_t CvtOpc[3][2][2] = {
3023  { { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr },
3024  { X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } },
3025  { { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr },
3026  { X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } },
3027  { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr },
3028  { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } },
3029  };
3030  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
3031  Subtarget->hasAVX() ? 1 :
3032  0;
3033  unsigned Opc;
3034  switch (VT.SimpleTy) {
3035  default: llvm_unreachable("Unexpected result type.");
3036  case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0]; break;
3037  case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1]; break;
3038  }
3039 
3040  // Check if we can fold insertelement instructions into the convert.
3041  const Value *Op = II->getArgOperand(0);
3042  while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
3043  const Value *Index = IE->getOperand(2);
3044  if (!isa<ConstantInt>(Index))
3045  break;
3046  unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
3047 
3048  if (Idx == 0) {
3049  Op = IE->getOperand(1);
3050  break;
3051  }
3052  Op = IE->getOperand(0);
3053  }
3054 
3055  unsigned Reg = getRegForValue(Op);
3056  if (Reg == 0)
3057  return false;
3058 
3059  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3060  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3061  .addReg(Reg);
3062 
3063  updateValueMap(II, ResultReg);
3064  return true;
3065  }
3066  }
3067 }
3068 
3069 bool X86FastISel::fastLowerArguments() {
3070  if (!FuncInfo.CanLowerReturn)
3071  return false;
3072 
3073  const Function *F = FuncInfo.Fn;
3074  if (F->isVarArg())
3075  return false;
3076 
3077  CallingConv::ID CC = F->getCallingConv();
3078  if (CC != CallingConv::C)
3079  return false;
3080 
3081  if (Subtarget->isCallingConvWin64(CC))
3082  return false;
3083 
3084  if (!Subtarget->is64Bit())
3085  return false;
3086 
3087  if (Subtarget->useSoftFloat())
3088  return false;
3089 
3090  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3091  unsigned GPRCnt = 0;
3092  unsigned FPRCnt = 0;
3093  for (auto const &Arg : F->args()) {
3094  if (Arg.hasAttribute(Attribute::ByVal) ||
3095  Arg.hasAttribute(Attribute::InReg) ||
3096  Arg.hasAttribute(Attribute::StructRet) ||
3097  Arg.hasAttribute(Attribute::SwiftSelf) ||
3098  Arg.hasAttribute(Attribute::SwiftError) ||
3099  Arg.hasAttribute(Attribute::Nest))
3100  return false;
3101 
3102  Type *ArgTy = Arg.getType();
3103  if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3104  return false;
3105 
3106  EVT ArgVT = TLI.getValueType(DL, ArgTy);
3107  if (!ArgVT.isSimple()) return false;
3108  switch (ArgVT.getSimpleVT().SimpleTy) {
3109  default: return false;
3110  case MVT::i32:
3111  case MVT::i64:
3112  ++GPRCnt;
3113  break;
3114  case MVT::f32:
3115  case MVT::f64:
3116  if (!Subtarget->hasSSE1())
3117  return false;
3118  ++FPRCnt;
3119  break;
3120  }
3121 
3122  if (GPRCnt > 6)
3123  return false;
3124 
3125  if (FPRCnt > 8)
3126  return false;
3127  }
3128 
3129  static const MCPhysReg GPR32ArgRegs[] = {
3130  X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
3131  };
3132  static const MCPhysReg GPR64ArgRegs[] = {
3133  X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3134  };
3135  static const MCPhysReg XMMArgRegs[] = {
3136  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3137  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3138  };
3139 
3140  unsigned GPRIdx = 0;
3141  unsigned FPRIdx = 0;
3142  for (auto const &Arg : F->args()) {
3143  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3144  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
3145  unsigned SrcReg;
3146  switch (VT.SimpleTy) {
3147  default: llvm_unreachable("Unexpected value type.");
3148  case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
3149  case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
3150  case MVT::f32: LLVM_FALLTHROUGH;
3151  case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
3152  }
3153  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3154  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3155  // Without this, EmitLiveInCopies may eliminate the livein if its only
3156  // use is a bitcast (which isn't turned into an instruction).
3157  unsigned ResultReg = createResultReg(RC);
3158  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3159  TII.get(TargetOpcode::COPY), ResultReg)
3160  .addReg(DstReg, getKillRegState(true));
3161  updateValueMap(&Arg, ResultReg);
3162  }
3163  return true;
3164 }
3165 
3166 static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
3167  CallingConv::ID CC,
3168  ImmutableCallSite *CS) {
3169  if (Subtarget->is64Bit())
3170  return 0;
3171  if (Subtarget->getTargetTriple().isOSMSVCRT())
3172  return 0;
3173  if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3174  CC == CallingConv::HiPE)
3175  return 0;
3176 
3177  if (CS)
3178  if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) ||
3179  CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
3180  return 0;
3181 
3182  return 4;
3183 }
3184 
3185 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3186  auto &OutVals = CLI.OutVals;
3187  auto &OutFlags = CLI.OutFlags;
3188  auto &OutRegs = CLI.OutRegs;
3189  auto &Ins = CLI.Ins;
3190  auto &InRegs = CLI.InRegs;
3191  CallingConv::ID CC = CLI.CallConv;
3192  bool &IsTailCall = CLI.IsTailCall;
3193  bool IsVarArg = CLI.IsVarArg;
3194  const Value *Callee = CLI.Callee;
3195  MCSymbol *Symbol = CLI.Symbol;
3196 
3197  bool Is64Bit = Subtarget->is64Bit();
3198  bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3199 
3200  const CallInst *CI =
3201  CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
3202  const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr;
3203 
3204  // Call / invoke instructions with NoCfCheck attribute require special
3205  // handling.
3206  const auto *II =
3207  CLI.CS ? dyn_cast<InvokeInst>(CLI.CS->getInstruction()) : nullptr;
3208  if ((CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck()))
3209  return false;
3210 
3211  // Functions with no_caller_saved_registers that need special handling.
3212  if ((CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3213  (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
3214  return false;
3215 
3216  // Functions using retpoline for indirect calls need to use SDISel.
3217  if (Subtarget->useRetpolineIndirectCalls())
3218  return false;
3219 
3220  // Handle only C, fastcc, and webkit_js calling conventions for now.
3221  switch (CC) {
3222  default: return false;
3223  case CallingConv::C:
3224  case CallingConv::Fast:
3226  case CallingConv::Swift:
3230  case CallingConv::Win64:
3232  break;
3233  }
3234 
3235  // Allow SelectionDAG isel to handle tail calls.
3236  if (IsTailCall)
3237  return false;
3238 
3239  // fastcc with -tailcallopt is intended to provide a guaranteed
3240  // tail call optimization. Fastisel doesn't know how to do that.
3241  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
3242  return false;
3243 
3244  // Don't know how to handle Win64 varargs yet. Nothing special needed for
3245  // x86-32. Special handling for x86-64 is implemented.
3246  if (IsVarArg && IsWin64)
3247  return false;
3248 
3249  // Don't know about inalloca yet.
3250  if (CLI.CS && CLI.CS->hasInAllocaArgument())
3251  return false;
3252 
3253  for (auto Flag : CLI.OutFlags)
3254  if (Flag.isSwiftError())
3255  return false;
3256 
3257  SmallVector<MVT, 16> OutVTs;
3258  SmallVector<unsigned, 16> ArgRegs;
3259 
3260  // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3261  // instruction. This is safe because it is common to all FastISel supported
3262  // calling conventions on x86.
3263  for (int i = 0, e = OutVals.size(); i != e; ++i) {
3264  Value *&Val = OutVals[i];
3265  ISD::ArgFlagsTy Flags = OutFlags[i];
3266  if (auto *CI = dyn_cast<ConstantInt>(Val)) {
3267  if (CI->getBitWidth() < 32) {
3268  if (Flags.isSExt())
3270  else
3272  }
3273  }
3274 
3275  // Passing bools around ends up doing a trunc to i1 and passing it.
3276  // Codegen this as an argument + "and 1".
3277  MVT VT;
3278  auto *TI = dyn_cast<TruncInst>(Val);
3279  unsigned ResultReg;
3280  if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
3281  (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
3282  TI->hasOneUse()) {
3283  Value *PrevVal = TI->getOperand(0);
3284  ResultReg = getRegForValue(PrevVal);
3285 
3286  if (!ResultReg)
3287  return false;
3288 
3289  if (!isTypeLegal(PrevVal->getType(), VT))
3290  return false;
3291 
3292  ResultReg =
3293  fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
3294  } else {
3295  if (!isTypeLegal(Val->getType(), VT))
3296  return false;
3297  ResultReg = getRegForValue(Val);
3298  }
3299 
3300  if (!ResultReg)
3301  return false;
3302 
3303  ArgRegs.push_back(ResultReg);
3304  OutVTs.push_back(VT);
3305  }
3306 
3307  // Analyze operands of the call, assigning locations to each operand.
3309  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
3310 
3311  // Allocate shadow area for Win64
3312  if (IsWin64)
3313  CCInfo.AllocateStack(32, 8);
3314 
3315  CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
3316 
3317  // Get a count of how many bytes are to be pushed on the stack.
3318  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3319 
3320  // Issue CALLSEQ_START
3321  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3322  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3323  .addImm(NumBytes).addImm(0).addImm(0);
3324 
3325  // Walk the register/memloc assignments, inserting copies/loads.
3326  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3327  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3328  CCValAssign const &VA = ArgLocs[i];
3329  const Value *ArgVal = OutVals[VA.getValNo()];
3330  MVT ArgVT = OutVTs[VA.getValNo()];
3331 
3332  if (ArgVT == MVT::x86mmx)
3333  return false;
3334 
3335  unsigned ArgReg = ArgRegs[VA.getValNo()];
3336 
3337  // Promote the value if needed.
3338  switch (VA.getLocInfo()) {
3339  case CCValAssign::Full: break;
3340  case CCValAssign::SExt: {
3341  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3342  "Unexpected extend");
3343 
3344  if (ArgVT == MVT::i1)
3345  return false;
3346 
3347  bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3348  ArgVT, ArgReg);
3349  assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
3350  ArgVT = VA.getLocVT();
3351  break;
3352  }
3353  case CCValAssign::ZExt: {
3354  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3355  "Unexpected extend");
3356 
3357  // Handle zero-extension from i1 to i8, which is common.
3358  if (ArgVT == MVT::i1) {
3359  // Set the high bits to zero.
3360  ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
3361  ArgVT = MVT::i8;
3362 
3363  if (ArgReg == 0)
3364  return false;
3365  }
3366 
3367  bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3368  ArgVT, ArgReg);
3369  assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
3370  ArgVT = VA.getLocVT();
3371  break;
3372  }
3373  case CCValAssign::AExt: {
3374  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3375  "Unexpected extend");
3376  bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
3377  ArgVT, ArgReg);
3378  if (!Emitted)
3379  Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3380  ArgVT, ArgReg);
3381  if (!Emitted)
3382  Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3383  ArgVT, ArgReg);
3384 
3385  assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
3386  ArgVT = VA.getLocVT();
3387  break;
3388  }
3389  case CCValAssign::BCvt: {
3390  ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
3391  /*TODO: Kill=*/false);
3392  assert(ArgReg && "Failed to emit a bitcast!");
3393  ArgVT = VA.getLocVT();
3394  break;
3395  }
3396  case CCValAssign::VExt:
3397  // VExt has not been implemented, so this should be impossible to reach
3398  // for now. However, fallback to Selection DAG isel once implemented.
3399  return false;
3403  case CCValAssign::FPExt:
3404  llvm_unreachable("Unexpected loc info!");
3405  case CCValAssign::Indirect:
3406  // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3407  // support this.
3408  return false;
3409  }
3410 
3411  if (VA.isRegLoc()) {
3412  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3413  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3414  OutRegs.push_back(VA.getLocReg());
3415  } else {
3416  assert(VA.isMemLoc());
3417 
3418  // Don't emit stores for undef values.
3419  if (isa<UndefValue>(ArgVal))
3420  continue;
3421 
3422  unsigned LocMemOffset = VA.getLocMemOffset();
3423  X86AddressMode AM;
3424  AM.Base.Reg = RegInfo->getStackRegister();
3425  AM.Disp = LocMemOffset;
3426  ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
3427  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3428  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3429  MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
3430  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3431  if (Flags.isByVal()) {
3432  X86AddressMode SrcAM;
3433  SrcAM.Base.Reg = ArgReg;
3434  if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
3435  return false;
3436  } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3437  // If this is a really simple value, emit this with the Value* version
3438  // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3439  // as it can cause us to reevaluate the argument.
3440  if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3441  return false;
3442  } else {
3443  bool ValIsKill = hasTrivialKill(ArgVal);
3444  if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
3445  return false;
3446  }
3447  }
3448  }
3449 
3450  // ELF / PIC requires GOT in the EBX register before function calls via PLT
3451  // GOT pointer.
3452  if (Subtarget->isPICStyleGOT()) {
3453  unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3454  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3455  TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
3456  }
3457 
3458  if (Is64Bit && IsVarArg && !IsWin64) {
3459  // From AMD64 ABI document:
3460  // For calls that may call functions that use varargs or stdargs
3461  // (prototype-less calls or calls to functions containing ellipsis (...) in
3462  // the declaration) %al is used as hidden argument to specify the number
3463  // of SSE registers used. The contents of %al do not need to match exactly
3464  // the number of registers, but must be an ubound on the number of SSE
3465  // registers used and is in the range 0 - 8 inclusive.
3466 
3467  // Count the number of XMM registers allocated.
3468  static const MCPhysReg XMMArgRegs[] = {
3469  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3470  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3471  };
3472  unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3473  assert((Subtarget->hasSSE1() || !NumXMMRegs)
3474  && "SSE registers cannot be used when SSE is disabled");
3475  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
3476  X86::AL).addImm(NumXMMRegs);
3477  }
3478 
3479  // Materialize callee address in a register. FIXME: GV address can be
3480  // handled with a CALLpcrel32 instead.
3481  X86AddressMode CalleeAM;
3482  if (!X86SelectCallAddress(Callee, CalleeAM))
3483  return false;
3484 
3485  unsigned CalleeOp = 0;
3486  const GlobalValue *GV = nullptr;
3487  if (CalleeAM.GV != nullptr) {
3488  GV = CalleeAM.GV;
3489  } else if (CalleeAM.Base.Reg != 0) {
3490  CalleeOp = CalleeAM.Base.Reg;
3491  } else
3492  return false;
3493 
3494  // Issue the call.
3495  MachineInstrBuilder MIB;
3496  if (CalleeOp) {
3497  // Register-indirect call.
3498  unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3499  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
3500  .addReg(CalleeOp);
3501  } else {
3502  // Direct call.
3503  assert(GV && "Not a direct call");
3504  // See if we need any target-specific flags on the GV operand.
3505  unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3506 
3507  // This will be a direct call, or an indirect call through memory for
3508  // NonLazyBind calls or dllimport calls.
3509  bool NeedLoad =
3510  OpFlags == X86II::MO_DLLIMPORT || OpFlags == X86II::MO_GOTPCREL;
3511  unsigned CallOpc = NeedLoad
3512  ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3513  : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3514 
3515  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
3516  if (NeedLoad)
3517  MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
3518  if (Symbol)
3519  MIB.addSym(Symbol, OpFlags);
3520  else
3521  MIB.addGlobalAddress(GV, 0, OpFlags);
3522  if (NeedLoad)
3523  MIB.addReg(0);
3524  }
3525 
3526  // Add a register mask operand representing the call-preserved registers.
3527  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3528  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3529 
3530  // Add an implicit use GOT pointer in EBX.
3531  if (Subtarget->isPICStyleGOT())
3533 
3534  if (Is64Bit && IsVarArg && !IsWin64)
3536 
3537  // Add implicit physical register uses to the call.
3538  for (auto Reg : OutRegs)
3540 
3541  // Issue CALLSEQ_END
3542  unsigned NumBytesForCalleeToPop =
3543  X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
3544  TM.Options.GuaranteedTailCallOpt)
3545  ? NumBytes // Callee pops everything.
3546  : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CS);
3547  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3548  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3549  .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
3550 
3551  // Now handle call return values.
3553  CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3554  CLI.RetTy->getContext());
3555  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3556 
3557  // Copy all of the result registers out of their specified physreg.
3558  unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3559  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3560  CCValAssign &VA = RVLocs[i];
3561  EVT CopyVT = VA.getValVT();
3562  unsigned CopyReg = ResultReg + i;
3563  unsigned SrcReg = VA.getLocReg();
3564 
3565  // If this is x86-64, and we disabled SSE, we can't return FP values
3566  if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3567  ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3568  report_fatal_error("SSE register return with SSE disabled");
3569  }
3570 
3571  // If we prefer to use the value in xmm registers, copy it out as f80 and
3572  // use a truncate to move it from fp stack reg to xmm reg.
3573  if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3574  isScalarFPTypeInSSEReg(VA.getValVT())) {
3575  CopyVT = MVT::f80;
3576  CopyReg = createResultReg(&X86::RFP80RegClass);
3577  }
3578 
3579  // Copy out the result.
3580  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3581  TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
3582  InRegs.push_back(VA.getLocReg());
3583 
3584  // Round the f80 to the right size, which also moves it to the appropriate
3585  // xmm register. This is accomplished by storing the f80 value in memory
3586  // and then loading it back.
3587  if (CopyVT != VA.getValVT()) {
3588  EVT ResVT = VA.getValVT();
3589  unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3590  unsigned MemSize = ResVT.getSizeInBits()/8;
3591  int FI = MFI.CreateStackObject(MemSize, MemSize, false);
3592  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3593  TII.get(Opc)), FI)
3594  .addReg(CopyReg);
3595  Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
3596  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3597  TII.get(Opc), ResultReg + i), FI);
3598  }
3599  }
3600 
3601  CLI.ResultReg = ResultReg;
3602  CLI.NumResultRegs = RVLocs.size();
3603  CLI.Call = MIB;
3604 
3605  return true;
3606 }
3607 
3608 bool
3609 X86FastISel::fastSelectInstruction(const Instruction *I) {
3610  switch (I->getOpcode()) {
3611  default: break;
3612  case Instruction::Load:
3613  return X86SelectLoad(I);
3614  case Instruction::Store:
3615  return X86SelectStore(I);
3616  case Instruction::Ret:
3617  return X86SelectRet(I);
3618  case Instruction::ICmp:
3619  case Instruction::FCmp:
3620  return X86SelectCmp(I);
3621  case Instruction::ZExt:
3622  return X86SelectZExt(I);
3623  case Instruction::SExt:
3624  return X86SelectSExt(I);
3625  case Instruction::Br:
3626  return X86SelectBranch(I);
3627  case Instruction::LShr:
3628  case Instruction::AShr:
3629  case Instruction::Shl:
3630  return X86SelectShift(I);
3631  case Instruction::SDiv:
3632  case Instruction::UDiv:
3633  case Instruction::SRem:
3634  case Instruction::URem:
3635  return X86SelectDivRem(I);
3636  case Instruction::Select:
3637  return X86SelectSelect(I);
3638  case Instruction::Trunc:
3639  return X86SelectTrunc(I);
3640  case Instruction::FPExt:
3641  return X86SelectFPExt(I);
3642  case Instruction::FPTrunc:
3643  return X86SelectFPTrunc(I);
3644  case Instruction::SIToFP:
3645  return X86SelectSIToFP(I);
3646  case Instruction::UIToFP:
3647  return X86SelectUIToFP(I);
3648  case Instruction::IntToPtr: // Deliberate fall-through.
3649  case Instruction::PtrToInt: {
3650  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3651  EVT DstVT = TLI.getValueType(DL, I->getType());
3652  if (DstVT.bitsGT(SrcVT))
3653  return X86SelectZExt(I);
3654  if (DstVT.bitsLT(SrcVT))
3655  return X86SelectTrunc(I);
3656  unsigned Reg = getRegForValue(I->getOperand(0));
3657  if (Reg == 0) return false;
3658  updateValueMap(I, Reg);
3659  return true;
3660  }
3661  case Instruction::BitCast: {
3662  // Select SSE2/AVX bitcasts between 128/256 bit vector types.
3663  if (!Subtarget->hasSSE2())
3664  return false;
3665 
3666  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3667  EVT DstVT = TLI.getValueType(DL, I->getType());
3668 
3669  if (!SrcVT.isSimple() || !DstVT.isSimple())
3670  return false;
3671 
3672  MVT SVT = SrcVT.getSimpleVT();
3673  MVT DVT = DstVT.getSimpleVT();
3674 
3675  if (!SVT.is128BitVector() &&
3676  !(Subtarget->hasAVX() && SVT.is256BitVector()) &&
3677  !(Subtarget->hasAVX512() && SVT.is512BitVector() &&
3678  (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 &&
3679  DVT.getScalarSizeInBits() >= 32))))
3680  return false;
3681 
3682  unsigned Reg = getRegForValue(I->getOperand(0));
3683  if (Reg == 0)
3684  return false;
3685 
3686  // No instruction is needed for conversion. Reuse the register used by
3687  // the fist operand.
3688  updateValueMap(I, Reg);
3689  return true;
3690  }
3691  }
3692 
3693  return false;
3694 }
3695 
3696 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
3697  if (VT > MVT::i64)
3698  return 0;
3699 
3700  uint64_t Imm = CI->getZExtValue();
3701  if (Imm == 0) {
3702  unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3703  switch (VT.SimpleTy) {
3704  default: llvm_unreachable("Unexpected value type");
3705  case MVT::i1:
3706  case MVT::i8:
3707  return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
3708  X86::sub_8bit);
3709  case MVT::i16:
3710  return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
3711  X86::sub_16bit);
3712  case MVT::i32:
3713  return SrcReg;
3714  case MVT::i64: {
3715  unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3716  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3717  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3718  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3719  return ResultReg;
3720  }
3721  }
3722  }
3723 
3724  unsigned Opc = 0;
3725  switch (VT.SimpleTy) {
3726  default: llvm_unreachable("Unexpected value type");
3727  case MVT::i1:
3728  VT = MVT::i8;
3730  case MVT::i8: Opc = X86::MOV8ri; break;
3731  case MVT::i16: Opc = X86::MOV16ri; break;
3732  case MVT::i32: Opc = X86::MOV32ri; break;
3733  case MVT::i64: {
3734  if (isUInt<32>(Imm))
3735  Opc = X86::MOV32ri64;
3736  else if (isInt<32>(Imm))
3737  Opc = X86::MOV64ri32;
3738  else
3739  Opc = X86::MOV64ri;
3740  break;
3741  }
3742  }
3743  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3744 }
3745 
3746 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
3747  if (CFP->isNullValue())
3748  return fastMaterializeFloatZero(CFP);
3749 
3750  // Can't handle alternate code models yet.
3751  CodeModel::Model CM = TM.getCodeModel();
3752  if (CM != CodeModel::Small && CM != CodeModel::Large)
3753  return 0;
3754 
3755  // Get opcode and regclass of the output for the given load instruction.
3756  unsigned Opc = 0;
3757  const TargetRegisterClass *RC = nullptr;
3758  switch (VT.SimpleTy) {
3759  default: return 0;
3760  case MVT::f32:
3761  if (X86ScalarSSEf32) {
3762  Opc = Subtarget->hasAVX512()
3763  ? X86::VMOVSSZrm
3764  : Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
3765  RC = Subtarget->hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3766  } else {
3767  Opc = X86::LD_Fp32m;
3768  RC = &X86::RFP32RegClass;
3769  }
3770  break;
3771  case MVT::f64:
3772  if (X86ScalarSSEf64) {
3773  Opc = Subtarget->hasAVX512()
3774  ? X86::VMOVSDZrm
3775  : Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
3776  RC = Subtarget->hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3777  } else {
3778  Opc = X86::LD_Fp64m;
3779  RC = &X86::RFP64RegClass;
3780  }
3781  break;
3782  case MVT::f80:
3783  // No f80 support yet.
3784  return 0;
3785  }
3786 
3787  // MachineConstantPool wants an explicit alignment.
3788  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
3789  if (Align == 0) {
3790  // Alignment of vector types. FIXME!
3791  Align = DL.getTypeAllocSize(CFP->getType());
3792  }
3793 
3794  // x86-32 PIC requires a PIC base register for constant pools.
3795  unsigned PICBase = 0;
3796  unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
3797  if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
3798  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3799  else if (OpFlag == X86II::MO_GOTOFF)
3800  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3801  else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
3802  PICBase = X86::RIP;
3803 
3804  // Create the load from the constant pool.
3805  unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
3806  unsigned ResultReg = createResultReg(RC);
3807 
3808  if (CM == CodeModel::Large) {
3809  unsigned AddrReg = createResultReg(&X86::GR64RegClass);
3810  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3811  AddrReg)
3812  .addConstantPoolIndex(CPI, 0, OpFlag);
3813  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3814  TII.get(Opc), ResultReg);
3815  addDirectMem(MIB, AddrReg);
3816  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3818  MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
3819  MIB->addMemOperand(*FuncInfo.MF, MMO);
3820  return ResultReg;
3821  }
3822 
3823  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3824  TII.get(Opc), ResultReg),
3825  CPI, PICBase, OpFlag);
3826  return ResultReg;
3827 }
3828 
3829 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
3830  // Can't handle alternate code models yet.
3831  if (TM.getCodeModel() != CodeModel::Small)
3832  return 0;
3833 
3834  // Materialize addresses with LEA/MOV instructions.
3835  X86AddressMode AM;
3836  if (X86SelectAddress(GV, AM)) {
3837  // If the expression is just a basereg, then we're done, otherwise we need
3838  // to emit an LEA.
3839  if (AM.BaseType == X86AddressMode::RegBase &&
3840  AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3841  return AM.Base.Reg;
3842 
3843  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3844  if (TM.getRelocationModel() == Reloc::Static &&
3845  TLI.getPointerTy(DL) == MVT::i64) {
3846  // The displacement code could be more than 32 bits away so we need to use
3847  // an instruction with a 64 bit immediate
3848  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3849  ResultReg)
3850  .addGlobalAddress(GV);
3851  } else {
3852  unsigned Opc =
3853  TLI.getPointerTy(DL) == MVT::i32
3854  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3855  : X86::LEA64r;
3856  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3857  TII.get(Opc), ResultReg), AM);
3858  }
3859  return ResultReg;
3860  }
3861  return 0;
3862 }
3863 
3864 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
3865  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
3866 
3867  // Only handle simple types.
3868  if (!CEVT.isSimple())
3869  return 0;
3870  MVT VT = CEVT.getSimpleVT();
3871 
3872  if (const auto *CI = dyn_cast<ConstantInt>(C))
3873  return X86MaterializeInt(CI, VT);
3874  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
3875  return X86MaterializeFP(CFP, VT);
3876  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
3877  return X86MaterializeGV(GV, VT);
3878 
3879  return 0;
3880 }
3881 
3882 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
3883  // Fail on dynamic allocas. At this point, getRegForValue has already
3884  // checked its CSE maps, so if we're here trying to handle a dynamic
3885  // alloca, we're not going to succeed. X86SelectAddress has a
3886  // check for dynamic allocas, because it's called directly from
3887  // various places, but targetMaterializeAlloca also needs a check
3888  // in order to avoid recursion between getRegForValue,
3889  // X86SelectAddrss, and targetMaterializeAlloca.
3890  if (!FuncInfo.StaticAllocaMap.count(C))
3891  return 0;
3892  assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3893 
3894  X86AddressMode AM;
3895  if (!X86SelectAddress(C, AM))
3896  return 0;
3897  unsigned Opc =
3898  TLI.getPointerTy(DL) == MVT::i32
3899  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3900  : X86::LEA64r;
3901  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
3902  unsigned ResultReg = createResultReg(RC);
3903  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3904  TII.get(Opc), ResultReg), AM);
3905  return ResultReg;
3906 }
3907 
3908 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
3909  MVT VT;
3910  if (!isTypeLegal(CF->getType(), VT))
3911  return 0;
3912 
3913  // Get opcode and regclass for the given zero.
3914  bool HasAVX512 = Subtarget->hasAVX512();
3915  unsigned Opc = 0;
3916  const TargetRegisterClass *RC = nullptr;
3917  switch (VT.SimpleTy) {
3918  default: return 0;
3919  case MVT::f32:
3920  if (X86ScalarSSEf32) {
3921  Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS;
3922  RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
3923  } else {
3924  Opc = X86::LD_Fp032;
3925  RC = &X86::RFP32RegClass;
3926  }
3927  break;
3928  case MVT::f64:
3929  if (X86ScalarSSEf64) {
3930  Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD;
3931  RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
3932  } else {
3933  Opc = X86::LD_Fp064;
3934  RC = &X86::RFP64RegClass;
3935  }
3936  break;
3937  case MVT::f80:
3938  // No f80 support yet.
3939  return 0;
3940  }
3941 
3942  unsigned ResultReg = createResultReg(RC);
3943  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
3944  return ResultReg;
3945 }
3946 
3947 
3948 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3949  const LoadInst *LI) {
3950  const Value *Ptr = LI->getPointerOperand();
3951  X86AddressMode AM;
3952  if (!X86SelectAddress(Ptr, AM))
3953  return false;
3954 
3955  const X86InstrInfo &XII = (const X86InstrInfo &)TII;
3956 
3957  unsigned Size = DL.getTypeAllocSize(LI->getType());
3958  unsigned Alignment = LI->getAlignment();
3959 
3960  if (Alignment == 0) // Ensure that codegen never sees alignment 0
3961  Alignment = DL.getABITypeAlignment(LI->getType());
3962 
3964  AM.getFullAddress(AddrOps);
3965 
3966  MachineInstr *Result = XII.foldMemoryOperandImpl(
3967  *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
3968  /*AllowCommute=*/true);
3969  if (!Result)
3970  return false;
3971 
3972  // The index register could be in the wrong register class. Unfortunately,
3973  // foldMemoryOperandImpl could have commuted the instruction so its not enough
3974  // to just look at OpNo + the offset to the index reg. We actually need to
3975  // scan the instruction to find the index reg and see if its the correct reg
3976  // class.
3977  unsigned OperandNo = 0;
3978  for (MachineInstr::mop_iterator I = Result->operands_begin(),
3979  E = Result->operands_end(); I != E; ++I, ++OperandNo) {
3980  MachineOperand &MO = *I;
3981  if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
3982  continue;
3983  // Found the index reg, now try to rewrite it.
3984  unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
3985  MO.getReg(), OperandNo);
3986  if (IndexReg == MO.getReg())
3987  continue;
3988  MO.setReg(IndexReg);
3989  }
3990 
3991  Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3993  removeDeadCode(I, std::next(I));
3994  return true;
3995 }
3996 
3997 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
3998  const TargetRegisterClass *RC,
3999  unsigned Op0, bool Op0IsKill,
4000  unsigned Op1, bool Op1IsKill,
4001  unsigned Op2, bool Op2IsKill,
4002  unsigned Op3, bool Op3IsKill) {
4003  const MCInstrDesc &II = TII.get(MachineInstOpcode);
4004 
4005  unsigned ResultReg = createResultReg(RC);
4006  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
4007  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
4008  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
4009  Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
4010 
4011  if (II.getNumDefs() >= 1)
4012  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
4013  .addReg(Op0, getKillRegState(Op0IsKill))
4014  .addReg(Op1, getKillRegState(Op1IsKill))
4015  .addReg(Op2, getKillRegState(Op2IsKill))
4016  .addReg(Op3, getKillRegState(Op3IsKill));
4017  else {
4018  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
4019  .addReg(Op0, getKillRegState(Op0IsKill))
4020  .addReg(Op1, getKillRegState(Op1IsKill))
4021  .addReg(Op2, getKillRegState(Op2IsKill))
4022  .addReg(Op3, getKillRegState(Op3IsKill));
4023  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4024  TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
4025  }
4026  return ResultReg;
4027 }
4028 
4029 
4030 namespace llvm {
4032  const TargetLibraryInfo *libInfo) {
4033  return new X86FastISel(funcInfo, libInfo);
4034  }
4035 }
bool hasAVX() const
Definition: X86Subtarget.h:563
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
void setFrameAddressIsTaken(bool T)
unsigned GetCondBranchFromCond(CondCode CC)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:570
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
Return a value (possibly void), from a function.
Value * getValueOperand()
Definition: Instructions.h:409
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:524
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:583
mop_iterator operands_end()
Definition: MachineInstr.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:636
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:565
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
X86_FastCall - &#39;fast&#39; analog of X86_StdCall.
Definition: CallingConv.h:91
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
addConstantPoolReference - This function is used to add a reference to the base of a constant value s...
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
bool isVector() const
Return true if this is a vector value type.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
union llvm::X86AddressMode::@500 Base
bool is256BitVector() const
Return true if this is a 256-bit vector type.
This class represents a function call, abstracting a target machine&#39;s calling convention.
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:652
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:302
unsigned getSourceAddressSpace() const
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:662
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
unsigned const TargetRegisterInfo * TRI
const GlobalValue * GV
A debug info location.
Definition: DebugLoc.h:33
F(f)
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const
An instruction for reading from memory.
Definition: Instructions.h:167
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:108
Hexagon Common GEP
Value * getCondition() const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
Value * getLength() const
op_iterator op_begin()
Definition: User.h:229
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:38
bool isMemLoc() const
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:274
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1155
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:657
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:554
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:656
A description of a memory reference used in the backend.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:745
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
Class to represent struct types.
Definition: DerivedTypes.h:232
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
Definition: X86InstrInfo.h:112
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:653
SimpleValueType SimpleTy
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1660
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
unsigned getSizeInBits() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:266
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
Definition: X86InstrInfo.h:129
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1674
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:84
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:66
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:405
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
X86_StdCall - stdcall is the calling conventions mostly used by the Win32 API.
Definition: CallingConv.h:86
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
This instruction compares its operands according to the predicate given to the constructor.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:385
An instruction for storing to memory.
Definition: Instructions.h:320
bool doesNoCfCheck() const
Determine if the call should not perform indirect branch tracking.
Definition: InstrTypes.h:1588
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
This class represents a truncation of integer types.
Value * getOperand(unsigned i) const
Definition: User.h:169
Class to represent pointers.
Definition: DerivedTypes.h:498
unsigned getByValSize() const
unsigned getKillRegState(bool B)
unsigned getStackRegister() const
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:117
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:143
bool is128BitVector() const
Return true if this is a 128-bit vector type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:228
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:146
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:168
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
unsigned const MachineRegisterInfo * MRI
enum llvm::X86AddressMode::@499 BaseType
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Conditional or Unconditional Branch instruction.
Value * getAddress() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
unsigned getScalarSizeInBits() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:173
DIExpression * getExpression() const
bool isValidLocationForIntrinsic(const DILocation *DL) const
Check that a location is valid for this variable.
op_iterator op_end()
Definition: User.h:231
static std::pair< unsigned, bool > getX86SSEConditionCode(CmpInst::Predicate Predicate)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:646
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:771
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:655
Value * getPointerOperand()
Definition: Instructions.h:284
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:724
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
static void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, X86AddressMode &AM)
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:663
unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand=false)
Return a set opcode for the given condition and whether it has a memory operand.
bool isTargetMCU() const
Definition: X86Subtarget.h:743
Extended Value Type.
Definition: ValueTypes.h:33
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:52
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1287
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:661
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:153
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
bool arg_empty() const
Definition: CallSite.h:225
The memory access writes data.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:650
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:308
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:660
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
X86_ThisCall - Similar to X86_StdCall.
Definition: CallingConv.h:110
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
This class wraps the llvm.memcpy intrinsic.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
Value * getRawSource() const
Return the arguments to the instruction.
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
amdgpu Simplify well known AMD library false FunctionCallee Callee
This file defines the FastISel class.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:240
unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given condition, register size in bytes, and operand type...
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:467
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:470
The memory access reads data.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
Representation of each machine instruction.
Definition: MachineInstr.h:63
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:100
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – t...
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1153
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:324
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:386
unsigned getDestAddressSpace() const
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:576
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
unsigned getLocMemOffset() const
Establish a view to a call site for examination.
Definition: CallSite.h:892
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1201
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:654
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
bool is512BitVector() const
Return true if this is a 512-bit vector type.
uint32_t Size
Definition: Profile.cpp:46
DILocalVariable * getVariable() const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:658
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegLoc() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasSSE1() const
Definition: X86Subtarget.h:557
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:649
LLVM Value Representation.
Definition: Value.h:72
mop_iterator operands_begin()
Definition: MachineInstr.h:452
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:659
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
If we have a comparison with RHS as the RHS of the comparison, return an opcode that works for the co...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
bool hasAVX512() const
Definition: X86Subtarget.h:565
Invoke instruction.
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:86
IRTranslator LLVM IR MI
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, const MachineOperand &RegMO, unsigned OpIdx)
Try to constrain Reg so that it is usable by argument OpIdx of the provided MCInstrDesc II...
Definition: Utils.cpp:46
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Conversion operators.
Definition: ISDOpcodes.h:464
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:156
X86AddressMode - This struct holds a generalized full x86 address mode.
unsigned getLocReg() const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:651
This represents the llvm.dbg.declare instruction.
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:149
Value * getPointerOperand()
Definition: Instructions.h:412
unsigned AllocateStack(unsigned Size, unsigned Align)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
Value * getRawDest() const
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:374
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool hasSSE2() const
Definition: X86Subtarget.h:558
iterator_range< arg_iterator > args()
Definition: Function.h:688
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:648
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:217
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
Definition: X86BaseInfo.h:198
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, ImmutableCallSite *CS)
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:220
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
an instruction to allocate memory on the stack
Definition: Instructions.h:59
gep_type_iterator gep_type_begin(const User *GEP)