LLVM  10.0.0svn
X86FastISel.cpp
Go to the documentation of this file.
1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the X86-specific support for the FastISel class. Much
10 // of the target-specific code is generated by tablegen in the file
11 // X86GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86CallingConv.h"
17 #include "X86InstrBuilder.h"
18 #include "X86InstrInfo.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86RegisterInfo.h"
21 #include "X86Subtarget.h"
22 #include "X86TargetMachine.h"
24 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/IR/CallSite.h"
30 #include "llvm/IR/CallingConv.h"
31 #include "llvm/IR/DebugInfo.h"
32 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalAlias.h"
35 #include "llvm/IR/GlobalVariable.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/MC/MCAsmInfo.h"
40 #include "llvm/MC/MCSymbol.h"
43 using namespace llvm;
44 
45 namespace {
46 
47 class X86FastISel final : public FastISel {
48  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
49  /// make the right decision when generating code for different targets.
50  const X86Subtarget *Subtarget;
51 
52  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
53  /// floating point ops.
54  /// When SSE is available, use it for f32 operations.
55  /// When SSE2 is available, use it for f64 operations.
56  bool X86ScalarSSEf64;
57  bool X86ScalarSSEf32;
58 
59 public:
60  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
61  const TargetLibraryInfo *libInfo)
62  : FastISel(funcInfo, libInfo) {
63  Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
64  X86ScalarSSEf64 = Subtarget->hasSSE2();
65  X86ScalarSSEf32 = Subtarget->hasSSE1();
66  }
67 
68  bool fastSelectInstruction(const Instruction *I) override;
69 
70  /// The specified machine instr operand is a vreg, and that
71  /// vreg is being provided by the specified load instruction. If possible,
72  /// try to fold the load as an operand to the instruction, returning true if
73  /// possible.
74  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
75  const LoadInst *LI) override;
76 
77  bool fastLowerArguments() override;
78  bool fastLowerCall(CallLoweringInfo &CLI) override;
79  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
80 
81 #include "X86GenFastISel.inc"
82 
83 private:
84  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
85  const DebugLoc &DL);
86 
87  bool X86FastEmitLoad(MVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
88  unsigned &ResultReg, unsigned Alignment = 1);
89 
90  bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
91  MachineMemOperand *MMO = nullptr, bool Aligned = false);
92  bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
93  X86AddressMode &AM,
94  MachineMemOperand *MMO = nullptr, bool Aligned = false);
95 
96  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
97  unsigned &ResultReg);
98 
99  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
100  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
101 
102  bool X86SelectLoad(const Instruction *I);
103 
104  bool X86SelectStore(const Instruction *I);
105 
106  bool X86SelectRet(const Instruction *I);
107 
108  bool X86SelectCmp(const Instruction *I);
109 
110  bool X86SelectZExt(const Instruction *I);
111 
112  bool X86SelectSExt(const Instruction *I);
113 
114  bool X86SelectBranch(const Instruction *I);
115 
116  bool X86SelectShift(const Instruction *I);
117 
118  bool X86SelectDivRem(const Instruction *I);
119 
120  bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
121 
122  bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
123 
124  bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
125 
126  bool X86SelectSelect(const Instruction *I);
127 
128  bool X86SelectTrunc(const Instruction *I);
129 
130  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
131  const TargetRegisterClass *RC);
132 
133  bool X86SelectFPExt(const Instruction *I);
134  bool X86SelectFPTrunc(const Instruction *I);
135  bool X86SelectSIToFP(const Instruction *I);
136  bool X86SelectUIToFP(const Instruction *I);
137  bool X86SelectIntToFP(const Instruction *I, bool IsSigned);
138 
139  const X86InstrInfo *getInstrInfo() const {
140  return Subtarget->getInstrInfo();
141  }
142  const X86TargetMachine *getTargetMachine() const {
143  return static_cast<const X86TargetMachine *>(&TM);
144  }
145 
146  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
147 
148  unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
149  unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
150  unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
151  unsigned fastMaterializeConstant(const Constant *C) override;
152 
153  unsigned fastMaterializeAlloca(const AllocaInst *C) override;
154 
155  unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
156 
157  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
158  /// computed in an SSE register, not on the X87 floating point stack.
159  bool isScalarFPTypeInSSEReg(EVT VT) const {
160  return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
161  (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
162  }
163 
164  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
165 
166  bool IsMemcpySmall(uint64_t Len);
167 
168  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
169  X86AddressMode SrcAM, uint64_t Len);
170 
171  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
172  const Value *Cond);
173 
175  X86AddressMode &AM);
176 
177  unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
178  const TargetRegisterClass *RC, unsigned Op0,
179  bool Op0IsKill, unsigned Op1, bool Op1IsKill,
180  unsigned Op2, bool Op2IsKill, unsigned Op3,
181  bool Op3IsKill);
182 };
183 
184 } // end anonymous namespace.
185 
186 static std::pair<unsigned, bool>
188  unsigned CC;
189  bool NeedSwap = false;
190 
191  // SSE Condition code mapping:
192  // 0 - EQ
193  // 1 - LT
194  // 2 - LE
195  // 3 - UNORD
196  // 4 - NEQ
197  // 5 - NLT
198  // 6 - NLE
199  // 7 - ORD
200  switch (Predicate) {
201  default: llvm_unreachable("Unexpected predicate");
202  case CmpInst::FCMP_OEQ: CC = 0; break;
203  case CmpInst::FCMP_OGT: NeedSwap = true; LLVM_FALLTHROUGH;
204  case CmpInst::FCMP_OLT: CC = 1; break;
205  case CmpInst::FCMP_OGE: NeedSwap = true; LLVM_FALLTHROUGH;
206  case CmpInst::FCMP_OLE: CC = 2; break;
207  case CmpInst::FCMP_UNO: CC = 3; break;
208  case CmpInst::FCMP_UNE: CC = 4; break;
209  case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH;
210  case CmpInst::FCMP_UGE: CC = 5; break;
211  case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
212  case CmpInst::FCMP_UGT: CC = 6; break;
213  case CmpInst::FCMP_ORD: CC = 7; break;
214  case CmpInst::FCMP_UEQ: CC = 8; break;
215  case CmpInst::FCMP_ONE: CC = 12; break;
216  }
217 
218  return std::make_pair(CC, NeedSwap);
219 }
220 
221 /// Adds a complex addressing mode to the given machine instr builder.
222 /// Note, this will constrain the index register. If its not possible to
223 /// constrain the given index register, then a new one will be created. The
224 /// IndexReg field of the addressing mode will be updated to match in this case.
225 const MachineInstrBuilder &
227  X86AddressMode &AM) {
228  // First constrain the index register. It needs to be a GR64_NOSP.
230  MIB->getNumOperands() +
232  return ::addFullAddress(MIB, AM);
233 }
234 
235 /// Check if it is possible to fold the condition from the XALU intrinsic
236 /// into the user. The condition code will only be updated on success.
237 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
238  const Value *Cond) {
239  if (!isa<ExtractValueInst>(Cond))
240  return false;
241 
242  const auto *EV = cast<ExtractValueInst>(Cond);
243  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
244  return false;
245 
246  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
247  MVT RetVT;
248  const Function *Callee = II->getCalledFunction();
249  Type *RetTy =
250  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
251  if (!isTypeLegal(RetTy, RetVT))
252  return false;
253 
254  if (RetVT != MVT::i32 && RetVT != MVT::i64)
255  return false;
256 
257  X86::CondCode TmpCC;
258  switch (II->getIntrinsicID()) {
259  default: return false;
260  case Intrinsic::sadd_with_overflow:
261  case Intrinsic::ssub_with_overflow:
262  case Intrinsic::smul_with_overflow:
263  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
264  case Intrinsic::uadd_with_overflow:
265  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
266  }
267 
268  // Check if both instructions are in the same basic block.
269  if (II->getParent() != I->getParent())
270  return false;
271 
272  // Make sure nothing is in the way
275  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
276  // We only expect extractvalue instructions between the intrinsic and the
277  // instruction to be selected.
278  if (!isa<ExtractValueInst>(Itr))
279  return false;
280 
281  // Check that the extractvalue operand comes from the intrinsic.
282  const auto *EVI = cast<ExtractValueInst>(Itr);
283  if (EVI->getAggregateOperand() != II)
284  return false;
285  }
286 
287  CC = TmpCC;
288  return true;
289 }
290 
291 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
292  EVT evt = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true);
293  if (evt == MVT::Other || !evt.isSimple())
294  // Unhandled type. Halt "fast" selection and bail.
295  return false;
296 
297  VT = evt.getSimpleVT();
298  // For now, require SSE/SSE2 for performing floating-point operations,
299  // since x87 requires additional work.
300  if (VT == MVT::f64 && !X86ScalarSSEf64)
301  return false;
302  if (VT == MVT::f32 && !X86ScalarSSEf32)
303  return false;
304  // Similarly, no f80 support yet.
305  if (VT == MVT::f80)
306  return false;
307  // We only handle legal types. For example, on x86-32 the instruction
308  // selector contains all of the 64-bit instructions from x86-64,
309  // under the assumption that i64 won't be used if the target doesn't
310  // support it.
311  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
312 }
313 
314 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
315 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
316 /// Return true and the result register by reference if it is possible.
317 bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM,
318  MachineMemOperand *MMO, unsigned &ResultReg,
319  unsigned Alignment) {
320  bool HasSSE41 = Subtarget->hasSSE41();
321  bool HasAVX = Subtarget->hasAVX();
322  bool HasAVX2 = Subtarget->hasAVX2();
323  bool HasAVX512 = Subtarget->hasAVX512();
324  bool HasVLX = Subtarget->hasVLX();
325  bool IsNonTemporal = MMO && MMO->isNonTemporal();
326 
327  // Treat i1 loads the same as i8 loads. Masking will be done when storing.
328  if (VT == MVT::i1)
329  VT = MVT::i8;
330 
331  // Get opcode and regclass of the output for the given load instruction.
332  unsigned Opc = 0;
333  switch (VT.SimpleTy) {
334  default: return false;
335  case MVT::i8:
336  Opc = X86::MOV8rm;
337  break;
338  case MVT::i16:
339  Opc = X86::MOV16rm;
340  break;
341  case MVT::i32:
342  Opc = X86::MOV32rm;
343  break;
344  case MVT::i64:
345  // Must be in x86-64 mode.
346  Opc = X86::MOV64rm;
347  break;
348  case MVT::f32:
349  if (X86ScalarSSEf32)
350  Opc = HasAVX512 ? X86::VMOVSSZrm_alt :
351  HasAVX ? X86::VMOVSSrm_alt :
352  X86::MOVSSrm_alt;
353  else
354  Opc = X86::LD_Fp32m;
355  break;
356  case MVT::f64:
357  if (X86ScalarSSEf64)
358  Opc = HasAVX512 ? X86::VMOVSDZrm_alt :
359  HasAVX ? X86::VMOVSDrm_alt :
360  X86::MOVSDrm_alt;
361  else
362  Opc = X86::LD_Fp64m;
363  break;
364  case MVT::f80:
365  // No f80 support yet.
366  return false;
367  case MVT::v4f32:
368  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
369  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
370  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
371  else if (Alignment >= 16)
372  Opc = HasVLX ? X86::VMOVAPSZ128rm :
373  HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
374  else
375  Opc = HasVLX ? X86::VMOVUPSZ128rm :
376  HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
377  break;
378  case MVT::v2f64:
379  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
380  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
381  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
382  else if (Alignment >= 16)
383  Opc = HasVLX ? X86::VMOVAPDZ128rm :
384  HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
385  else
386  Opc = HasVLX ? X86::VMOVUPDZ128rm :
387  HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
388  break;
389  case MVT::v4i32:
390  case MVT::v2i64:
391  case MVT::v8i16:
392  case MVT::v16i8:
393  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
394  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
395  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
396  else if (Alignment >= 16)
397  Opc = HasVLX ? X86::VMOVDQA64Z128rm :
398  HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
399  else
400  Opc = HasVLX ? X86::VMOVDQU64Z128rm :
401  HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
402  break;
403  case MVT::v8f32:
404  assert(HasAVX);
405  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
406  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
407  else if (IsNonTemporal && Alignment >= 16)
408  return false; // Force split for X86::VMOVNTDQArm
409  else if (Alignment >= 32)
410  Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
411  else
412  Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
413  break;
414  case MVT::v4f64:
415  assert(HasAVX);
416  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
417  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
418  else if (IsNonTemporal && Alignment >= 16)
419  return false; // Force split for X86::VMOVNTDQArm
420  else if (Alignment >= 32)
421  Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
422  else
423  Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
424  break;
425  case MVT::v8i32:
426  case MVT::v4i64:
427  case MVT::v16i16:
428  case MVT::v32i8:
429  assert(HasAVX);
430  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
431  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
432  else if (IsNonTemporal && Alignment >= 16)
433  return false; // Force split for X86::VMOVNTDQArm
434  else if (Alignment >= 32)
435  Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
436  else
437  Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
438  break;
439  case MVT::v16f32:
440  assert(HasAVX512);
441  if (IsNonTemporal && Alignment >= 64)
442  Opc = X86::VMOVNTDQAZrm;
443  else
444  Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
445  break;
446  case MVT::v8f64:
447  assert(HasAVX512);
448  if (IsNonTemporal && Alignment >= 64)
449  Opc = X86::VMOVNTDQAZrm;
450  else
451  Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
452  break;
453  case MVT::v8i64:
454  case MVT::v16i32:
455  case MVT::v32i16:
456  case MVT::v64i8:
457  assert(HasAVX512);
458  // Note: There are a lot more choices based on type with AVX-512, but
459  // there's really no advantage when the load isn't masked.
460  if (IsNonTemporal && Alignment >= 64)
461  Opc = X86::VMOVNTDQAZrm;
462  else
463  Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
464  break;
465  }
466 
467  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
468 
469  ResultReg = createResultReg(RC);
470  MachineInstrBuilder MIB =
471  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
472  addFullAddress(MIB, AM);
473  if (MMO)
474  MIB->addMemOperand(*FuncInfo.MF, MMO);
475  return true;
476 }
477 
478 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
479 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
480 /// and a displacement offset, or a GlobalAddress,
481 /// i.e. V. Return true if it is possible.
482 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
483  X86AddressMode &AM,
484  MachineMemOperand *MMO, bool Aligned) {
485  bool HasSSE1 = Subtarget->hasSSE1();
486  bool HasSSE2 = Subtarget->hasSSE2();
487  bool HasSSE4A = Subtarget->hasSSE4A();
488  bool HasAVX = Subtarget->hasAVX();
489  bool HasAVX512 = Subtarget->hasAVX512();
490  bool HasVLX = Subtarget->hasVLX();
491  bool IsNonTemporal = MMO && MMO->isNonTemporal();
492 
493  // Get opcode and regclass of the output for the given store instruction.
494  unsigned Opc = 0;
495  switch (VT.getSimpleVT().SimpleTy) {
496  case MVT::f80: // No f80 support yet.
497  default: return false;
498  case MVT::i1: {
499  // Mask out all but lowest bit.
500  unsigned AndResult = createResultReg(&X86::GR8RegClass);
501  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
502  TII.get(X86::AND8ri), AndResult)
503  .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
504  ValReg = AndResult;
505  LLVM_FALLTHROUGH; // handle i1 as i8.
506  }
507  case MVT::i8: Opc = X86::MOV8mr; break;
508  case MVT::i16: Opc = X86::MOV16mr; break;
509  case MVT::i32:
510  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
511  break;
512  case MVT::i64:
513  // Must be in x86-64 mode.
514  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
515  break;
516  case MVT::f32:
517  if (X86ScalarSSEf32) {
518  if (IsNonTemporal && HasSSE4A)
519  Opc = X86::MOVNTSS;
520  else
521  Opc = HasAVX512 ? X86::VMOVSSZmr :
522  HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
523  } else
524  Opc = X86::ST_Fp32m;
525  break;
526  case MVT::f64:
527  if (X86ScalarSSEf32) {
528  if (IsNonTemporal && HasSSE4A)
529  Opc = X86::MOVNTSD;
530  else
531  Opc = HasAVX512 ? X86::VMOVSDZmr :
532  HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
533  } else
534  Opc = X86::ST_Fp64m;
535  break;
536  case MVT::x86mmx:
537  Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
538  break;
539  case MVT::v4f32:
540  if (Aligned) {
541  if (IsNonTemporal)
542  Opc = HasVLX ? X86::VMOVNTPSZ128mr :
543  HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
544  else
545  Opc = HasVLX ? X86::VMOVAPSZ128mr :
546  HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
547  } else
548  Opc = HasVLX ? X86::VMOVUPSZ128mr :
549  HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
550  break;
551  case MVT::v2f64:
552  if (Aligned) {
553  if (IsNonTemporal)
554  Opc = HasVLX ? X86::VMOVNTPDZ128mr :
555  HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
556  else
557  Opc = HasVLX ? X86::VMOVAPDZ128mr :
558  HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
559  } else
560  Opc = HasVLX ? X86::VMOVUPDZ128mr :
561  HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
562  break;
563  case MVT::v4i32:
564  case MVT::v2i64:
565  case MVT::v8i16:
566  case MVT::v16i8:
567  if (Aligned) {
568  if (IsNonTemporal)
569  Opc = HasVLX ? X86::VMOVNTDQZ128mr :
570  HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
571  else
572  Opc = HasVLX ? X86::VMOVDQA64Z128mr :
573  HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
574  } else
575  Opc = HasVLX ? X86::VMOVDQU64Z128mr :
576  HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
577  break;
578  case MVT::v8f32:
579  assert(HasAVX);
580  if (Aligned) {
581  if (IsNonTemporal)
582  Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
583  else
584  Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
585  } else
586  Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
587  break;
588  case MVT::v4f64:
589  assert(HasAVX);
590  if (Aligned) {
591  if (IsNonTemporal)
592  Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
593  else
594  Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
595  } else
596  Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
597  break;
598  case MVT::v8i32:
599  case MVT::v4i64:
600  case MVT::v16i16:
601  case MVT::v32i8:
602  assert(HasAVX);
603  if (Aligned) {
604  if (IsNonTemporal)
605  Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
606  else
607  Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
608  } else
609  Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
610  break;
611  case MVT::v16f32:
612  assert(HasAVX512);
613  if (Aligned)
614  Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
615  else
616  Opc = X86::VMOVUPSZmr;
617  break;
618  case MVT::v8f64:
619  assert(HasAVX512);
620  if (Aligned) {
621  Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
622  } else
623  Opc = X86::VMOVUPDZmr;
624  break;
625  case MVT::v8i64:
626  case MVT::v16i32:
627  case MVT::v32i16:
628  case MVT::v64i8:
629  assert(HasAVX512);
630  // Note: There are a lot more choices based on type with AVX-512, but
631  // there's really no advantage when the store isn't masked.
632  if (Aligned)
633  Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
634  else
635  Opc = X86::VMOVDQU64Zmr;
636  break;
637  }
638 
639  const MCInstrDesc &Desc = TII.get(Opc);
640  // Some of the instructions in the previous switch use FR128 instead
641  // of FR32 for ValReg. Make sure the register we feed the instruction
642  // matches its register class constraints.
643  // Note: This is fine to do a copy from FR32 to FR128, this is the
644  // same registers behind the scene and actually why it did not trigger
645  // any bugs before.
646  ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
647  MachineInstrBuilder MIB =
648  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc);
649  addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
650  if (MMO)
651  MIB->addMemOperand(*FuncInfo.MF, MMO);
652 
653  return true;
654 }
655 
656 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
657  X86AddressMode &AM,
658  MachineMemOperand *MMO, bool Aligned) {
659  // Handle 'null' like i32/i64 0.
660  if (isa<ConstantPointerNull>(Val))
661  Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
662 
663  // If this is a store of a simple constant, fold the constant into the store.
664  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
665  unsigned Opc = 0;
666  bool Signed = true;
667  switch (VT.getSimpleVT().SimpleTy) {
668  default: break;
669  case MVT::i1:
670  Signed = false;
671  LLVM_FALLTHROUGH; // Handle as i8.
672  case MVT::i8: Opc = X86::MOV8mi; break;
673  case MVT::i16: Opc = X86::MOV16mi; break;
674  case MVT::i32: Opc = X86::MOV32mi; break;
675  case MVT::i64:
676  // Must be a 32-bit sign extended value.
677  if (isInt<32>(CI->getSExtValue()))
678  Opc = X86::MOV64mi32;
679  break;
680  }
681 
682  if (Opc) {
683  MachineInstrBuilder MIB =
684  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
685  addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
686  : CI->getZExtValue());
687  if (MMO)
688  MIB->addMemOperand(*FuncInfo.MF, MMO);
689  return true;
690  }
691  }
692 
693  unsigned ValReg = getRegForValue(Val);
694  if (ValReg == 0)
695  return false;
696 
697  bool ValKill = hasTrivialKill(Val);
698  return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
699 }
700 
701 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
702 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
703 /// ISD::SIGN_EXTEND).
704 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
705  unsigned Src, EVT SrcVT,
706  unsigned &ResultReg) {
707  unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
708  Src, /*TODO: Kill=*/false);
709  if (RR == 0)
710  return false;
711 
712  ResultReg = RR;
713  return true;
714 }
715 
716 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
717  // Handle constant address.
718  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
719  // Can't handle alternate code models yet.
720  if (TM.getCodeModel() != CodeModel::Small)
721  return false;
722 
723  // Can't handle TLS yet.
724  if (GV->isThreadLocal())
725  return false;
726 
727  // Can't handle !absolute_symbol references yet.
728  if (GV->isAbsoluteSymbolRef())
729  return false;
730 
731  // RIP-relative addresses can't have additional register operands, so if
732  // we've already folded stuff into the addressing mode, just force the
733  // global value into its own register, which we can use as the basereg.
734  if (!Subtarget->isPICStyleRIPRel() ||
735  (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
736  // Okay, we've committed to selecting this global. Set up the address.
737  AM.GV = GV;
738 
739  // Allow the subtarget to classify the global.
740  unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
741 
742  // If this reference is relative to the pic base, set it now.
743  if (isGlobalRelativeToPICBase(GVFlags)) {
744  // FIXME: How do we know Base.Reg is free??
745  AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
746  }
747 
748  // Unless the ABI requires an extra load, return a direct reference to
749  // the global.
750  if (!isGlobalStubReference(GVFlags)) {
751  if (Subtarget->isPICStyleRIPRel()) {
752  // Use rip-relative addressing if we can. Above we verified that the
753  // base and index registers are unused.
754  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
755  AM.Base.Reg = X86::RIP;
756  }
757  AM.GVOpFlags = GVFlags;
758  return true;
759  }
760 
761  // Ok, we need to do a load from a stub. If we've already loaded from
762  // this stub, reuse the loaded pointer, otherwise emit the load now.
763  DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
764  unsigned LoadReg;
765  if (I != LocalValueMap.end() && I->second != 0) {
766  LoadReg = I->second;
767  } else {
768  // Issue load from stub.
769  unsigned Opc = 0;
770  const TargetRegisterClass *RC = nullptr;
771  X86AddressMode StubAM;
772  StubAM.Base.Reg = AM.Base.Reg;
773  StubAM.GV = GV;
774  StubAM.GVOpFlags = GVFlags;
775 
776  // Prepare for inserting code in the local-value area.
777  SavePoint SaveInsertPt = enterLocalValueArea();
778 
779  if (TLI.getPointerTy(DL) == MVT::i64) {
780  Opc = X86::MOV64rm;
781  RC = &X86::GR64RegClass;
782 
783  if (Subtarget->isPICStyleRIPRel())
784  StubAM.Base.Reg = X86::RIP;
785  } else {
786  Opc = X86::MOV32rm;
787  RC = &X86::GR32RegClass;
788  }
789 
790  LoadReg = createResultReg(RC);
791  MachineInstrBuilder LoadMI =
792  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
793  addFullAddress(LoadMI, StubAM);
794 
795  // Ok, back to normal mode.
796  leaveLocalValueArea(SaveInsertPt);
797 
798  // Prevent loading GV stub multiple times in same MBB.
799  LocalValueMap[V] = LoadReg;
800  }
801 
802  // Now construct the final address. Note that the Disp, Scale,
803  // and Index values may already be set here.
804  AM.Base.Reg = LoadReg;
805  AM.GV = nullptr;
806  return true;
807  }
808  }
809 
810  // If all else fails, try to materialize the value in a register.
811  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
812  if (AM.Base.Reg == 0) {
813  AM.Base.Reg = getRegForValue(V);
814  return AM.Base.Reg != 0;
815  }
816  if (AM.IndexReg == 0) {
817  assert(AM.Scale == 1 && "Scale with no index!");
818  AM.IndexReg = getRegForValue(V);
819  return AM.IndexReg != 0;
820  }
821  }
822 
823  return false;
824 }
825 
826 /// X86SelectAddress - Attempt to fill in an address from the given value.
827 ///
830 redo_gep:
831  const User *U = nullptr;
832  unsigned Opcode = Instruction::UserOp1;
833  if (const Instruction *I = dyn_cast<Instruction>(V)) {
834  // Don't walk into other basic blocks; it's possible we haven't
835  // visited them yet, so the instructions may not yet be assigned
836  // virtual registers.
837  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
838  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
839  Opcode = I->getOpcode();
840  U = I;
841  }
842  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
843  Opcode = C->getOpcode();
844  U = C;
845  }
846 
847  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
848  if (Ty->getAddressSpace() > 255)
849  // Fast instruction selection doesn't support the special
850  // address spaces.
851  return false;
852 
853  switch (Opcode) {
854  default: break;
855  case Instruction::BitCast:
856  // Look past bitcasts.
857  return X86SelectAddress(U->getOperand(0), AM);
858 
859  case Instruction::IntToPtr:
860  // Look past no-op inttoptrs.
861  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
862  TLI.getPointerTy(DL))
863  return X86SelectAddress(U->getOperand(0), AM);
864  break;
865 
866  case Instruction::PtrToInt:
867  // Look past no-op ptrtoints.
868  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
869  return X86SelectAddress(U->getOperand(0), AM);
870  break;
871 
872  case Instruction::Alloca: {
873  // Do static allocas.
874  const AllocaInst *A = cast<AllocaInst>(V);
876  FuncInfo.StaticAllocaMap.find(A);
877  if (SI != FuncInfo.StaticAllocaMap.end()) {
879  AM.Base.FrameIndex = SI->second;
880  return true;
881  }
882  break;
883  }
884 
885  case Instruction::Add: {
886  // Adds of constants are common and easy enough.
887  if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
888  uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
889  // They have to fit in the 32-bit signed displacement field though.
890  if (isInt<32>(Disp)) {
891  AM.Disp = (uint32_t)Disp;
892  return X86SelectAddress(U->getOperand(0), AM);
893  }
894  }
895  break;
896  }
897 
898  case Instruction::GetElementPtr: {
899  X86AddressMode SavedAM = AM;
900 
901  // Pattern-match simple GEPs.
902  uint64_t Disp = (int32_t)AM.Disp;
903  unsigned IndexReg = AM.IndexReg;
904  unsigned Scale = AM.Scale;
906  // Iterate through the indices, folding what we can. Constants can be
907  // folded, and one dynamic index can be handled, if the scale is supported.
908  for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
909  i != e; ++i, ++GTI) {
910  const Value *Op = *i;
911  if (StructType *STy = GTI.getStructTypeOrNull()) {
912  const StructLayout *SL = DL.getStructLayout(STy);
913  Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
914  continue;
915  }
916 
917  // A array/variable index is always of the form i*S where S is the
918  // constant scale size. See if we can push the scale into immediates.
919  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
920  for (;;) {
921  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
922  // Constant-offset addressing.
923  Disp += CI->getSExtValue() * S;
924  break;
925  }
926  if (canFoldAddIntoGEP(U, Op)) {
927  // A compatible add with a constant operand. Fold the constant.
928  ConstantInt *CI =
929  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
930  Disp += CI->getSExtValue() * S;
931  // Iterate on the other operand.
932  Op = cast<AddOperator>(Op)->getOperand(0);
933  continue;
934  }
935  if (IndexReg == 0 &&
936  (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
937  (S == 1 || S == 2 || S == 4 || S == 8)) {
938  // Scaled-index addressing.
939  Scale = S;
940  IndexReg = getRegForGEPIndex(Op).first;
941  if (IndexReg == 0)
942  return false;
943  break;
944  }
945  // Unsupported.
946  goto unsupported_gep;
947  }
948  }
949 
950  // Check for displacement overflow.
951  if (!isInt<32>(Disp))
952  break;
953 
954  AM.IndexReg = IndexReg;
955  AM.Scale = Scale;
956  AM.Disp = (uint32_t)Disp;
957  GEPs.push_back(V);
958 
959  if (const GetElementPtrInst *GEP =
960  dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
961  // Ok, the GEP indices were covered by constant-offset and scaled-index
962  // addressing. Update the address state and move on to examining the base.
963  V = GEP;
964  goto redo_gep;
965  } else if (X86SelectAddress(U->getOperand(0), AM)) {
966  return true;
967  }
968 
969  // If we couldn't merge the gep value into this addr mode, revert back to
970  // our address and just match the value instead of completely failing.
971  AM = SavedAM;
972 
973  for (const Value *I : reverse(GEPs))
974  if (handleConstantAddresses(I, AM))
975  return true;
976 
977  return false;
978  unsupported_gep:
979  // Ok, the GEP indices weren't all covered.
980  break;
981  }
982  }
983 
984  return handleConstantAddresses(V, AM);
985 }
986 
987 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
988 ///
989 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
990  const User *U = nullptr;
991  unsigned Opcode = Instruction::UserOp1;
992  const Instruction *I = dyn_cast<Instruction>(V);
993  // Record if the value is defined in the same basic block.
994  //
995  // This information is crucial to know whether or not folding an
996  // operand is valid.
997  // Indeed, FastISel generates or reuses a virtual register for all
998  // operands of all instructions it selects. Obviously, the definition and
999  // its uses must use the same virtual register otherwise the produced
1000  // code is incorrect.
1001  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
1002  // registers for values that are alive across basic blocks. This ensures
1003  // that the values are consistently set between across basic block, even
1004  // if different instruction selection mechanisms are used (e.g., a mix of
1005  // SDISel and FastISel).
1006  // For values local to a basic block, the instruction selection process
1007  // generates these virtual registers with whatever method is appropriate
1008  // for its needs. In particular, FastISel and SDISel do not share the way
1009  // local virtual registers are set.
1010  // Therefore, this is impossible (or at least unsafe) to share values
1011  // between basic blocks unless they use the same instruction selection
1012  // method, which is not guarantee for X86.
1013  // Moreover, things like hasOneUse could not be used accurately, if we
1014  // allow to reference values across basic blocks whereas they are not
1015  // alive across basic blocks initially.
1016  bool InMBB = true;
1017  if (I) {
1018  Opcode = I->getOpcode();
1019  U = I;
1020  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
1021  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
1022  Opcode = C->getOpcode();
1023  U = C;
1024  }
1025 
1026  switch (Opcode) {
1027  default: break;
1028  case Instruction::BitCast:
1029  // Look past bitcasts if its operand is in the same BB.
1030  if (InMBB)
1031  return X86SelectCallAddress(U->getOperand(0), AM);
1032  break;
1033 
1034  case Instruction::IntToPtr:
1035  // Look past no-op inttoptrs if its operand is in the same BB.
1036  if (InMBB &&
1037  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
1038  TLI.getPointerTy(DL))
1039  return X86SelectCallAddress(U->getOperand(0), AM);
1040  break;
1041 
1042  case Instruction::PtrToInt:
1043  // Look past no-op ptrtoints if its operand is in the same BB.
1044  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
1045  return X86SelectCallAddress(U->getOperand(0), AM);
1046  break;
1047  }
1048 
1049  // Handle constant address.
1050  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1051  // Can't handle alternate code models yet.
1052  if (TM.getCodeModel() != CodeModel::Small)
1053  return false;
1054 
1055  // RIP-relative addresses can't have additional register operands.
1056  if (Subtarget->isPICStyleRIPRel() &&
1057  (AM.Base.Reg != 0 || AM.IndexReg != 0))
1058  return false;
1059 
1060  // Can't handle TLS.
1061  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
1062  if (GVar->isThreadLocal())
1063  return false;
1064 
1065  // Okay, we've committed to selecting this global. Set up the basic address.
1066  AM.GV = GV;
1067 
1068  // Return a direct reference to the global. Fastisel can handle calls to
1069  // functions that require loads, such as dllimport and nonlazybind
1070  // functions.
1071  if (Subtarget->isPICStyleRIPRel()) {
1072  // Use rip-relative addressing if we can. Above we verified that the
1073  // base and index registers are unused.
1074  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
1075  AM.Base.Reg = X86::RIP;
1076  } else {
1077  AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
1078  }
1079 
1080  return true;
1081  }
1082 
1083  // If all else fails, try to materialize the value in a register.
1084  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
1085  if (AM.Base.Reg == 0) {
1086  AM.Base.Reg = getRegForValue(V);
1087  return AM.Base.Reg != 0;
1088  }
1089  if (AM.IndexReg == 0) {
1090  assert(AM.Scale == 1 && "Scale with no index!");
1091  AM.IndexReg = getRegForValue(V);
1092  return AM.IndexReg != 0;
1093  }
1094  }
1095 
1096  return false;
1097 }
1098 
1099 
1100 /// X86SelectStore - Select and emit code to implement store instructions.
1101 bool X86FastISel::X86SelectStore(const Instruction *I) {
1102  // Atomic stores need special handling.
1103  const StoreInst *S = cast<StoreInst>(I);
1104 
1105  if (S->isAtomic())
1106  return false;
1107 
1108  const Value *PtrV = I->getOperand(1);
1109  if (TLI.supportSwiftError()) {
1110  // Swifterror values can come from either a function parameter with
1111  // swifterror attribute or an alloca with swifterror attribute.
1112  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1113  if (Arg->hasSwiftErrorAttr())
1114  return false;
1115  }
1116 
1117  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1118  if (Alloca->isSwiftError())
1119  return false;
1120  }
1121  }
1122 
1123  const Value *Val = S->getValueOperand();
1124  const Value *Ptr = S->getPointerOperand();
1125 
1126  MVT VT;
1127  if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
1128  return false;
1129 
1130  unsigned Alignment = S->getAlignment();
1131  unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
1132  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1133  Alignment = ABIAlignment;
1134  bool Aligned = Alignment >= ABIAlignment;
1135 
1136  X86AddressMode AM;
1137  if (!X86SelectAddress(Ptr, AM))
1138  return false;
1139 
1140  return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
1141 }
1142 
1143 /// X86SelectRet - Select and emit code to implement ret instructions.
1144 bool X86FastISel::X86SelectRet(const Instruction *I) {
1145  const ReturnInst *Ret = cast<ReturnInst>(I);
1146  const Function &F = *I->getParent()->getParent();
1147  const X86MachineFunctionInfo *X86MFInfo =
1148  FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
1149 
1150  if (!FuncInfo.CanLowerReturn)
1151  return false;
1152 
1153  if (TLI.supportSwiftError() &&
1154  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1155  return false;
1156 
1157  if (TLI.supportSplitCSR(FuncInfo.MF))
1158  return false;
1159 
1160  CallingConv::ID CC = F.getCallingConv();
1161  if (CC != CallingConv::C &&
1162  CC != CallingConv::Fast &&
1163  CC != CallingConv::Tail &&
1164  CC != CallingConv::X86_FastCall &&
1165  CC != CallingConv::X86_StdCall &&
1166  CC != CallingConv::X86_ThisCall &&
1167  CC != CallingConv::X86_64_SysV &&
1168  CC != CallingConv::Win64)
1169  return false;
1170 
1171  // Don't handle popping bytes if they don't fit the ret's immediate.
1172  if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
1173  return false;
1174 
1175  // fastcc with -tailcallopt is intended to provide a guaranteed
1176  // tail call optimization. Fastisel doesn't know how to do that.
1177  if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
1178  CC == CallingConv::Tail)
1179  return false;
1180 
1181  // Let SDISel handle vararg functions.
1182  if (F.isVarArg())
1183  return false;
1184 
1185  // Build a list of return value registers.
1186  SmallVector<unsigned, 4> RetRegs;
1187 
1188  if (Ret->getNumOperands() > 0) {
1190  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1191 
1192  // Analyze operands of the call, assigning locations to each operand.
1194  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
1195  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1196 
1197  const Value *RV = Ret->getOperand(0);
1198  unsigned Reg = getRegForValue(RV);
1199  if (Reg == 0)
1200  return false;
1201 
1202  // Only handle a single return value for now.
1203  if (ValLocs.size() != 1)
1204  return false;
1205 
1206  CCValAssign &VA = ValLocs[0];
1207 
1208  // Don't bother handling odd stuff for now.
1209  if (VA.getLocInfo() != CCValAssign::Full)
1210  return false;
1211  // Only handle register returns for now.
1212  if (!VA.isRegLoc())
1213  return false;
1214 
1215  // The calling-convention tables for x87 returns don't tell
1216  // the whole story.
1217  if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
1218  return false;
1219 
1220  unsigned SrcReg = Reg + VA.getValNo();
1221  EVT SrcVT = TLI.getValueType(DL, RV->getType());
1222  EVT DstVT = VA.getValVT();
1223  // Special handling for extended integers.
1224  if (SrcVT != DstVT) {
1225  if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1226  return false;
1227 
1228  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1229  return false;
1230 
1231  assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1232 
1233  if (SrcVT == MVT::i1) {
1234  if (Outs[0].Flags.isSExt())
1235  return false;
1236  SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
1237  SrcVT = MVT::i8;
1238  }
1239  unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1241  SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
1242  SrcReg, /*TODO: Kill=*/false);
1243  }
1244 
1245  // Make the copy.
1246  Register DstReg = VA.getLocReg();
1247  const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
1248  // Avoid a cross-class copy. This is very unlikely.
1249  if (!SrcRC->contains(DstReg))
1250  return false;
1251  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1252  TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
1253 
1254  // Add register to return instruction.
1255  RetRegs.push_back(VA.getLocReg());
1256  }
1257 
1258  // Swift calling convention does not require we copy the sret argument
1259  // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1260 
1261  // All x86 ABIs require that for returning structs by value we copy
1262  // the sret argument into %rax/%eax (depending on ABI) for the return.
1263  // We saved the argument into a virtual register in the entry block,
1264  // so now we copy the value out and into %rax/%eax.
1265  if (F.hasStructRetAttr() && CC != CallingConv::Swift) {
1266  unsigned Reg = X86MFInfo->getSRetReturnReg();
1267  assert(Reg &&
1268  "SRetReturnReg should have been set in LowerFormalArguments()!");
1269  unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
1270  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1271  TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
1272  RetRegs.push_back(RetReg);
1273  }
1274 
1275  // Now emit the RET.
1276  MachineInstrBuilder MIB;
1277  if (X86MFInfo->getBytesToPopOnReturn()) {
1278  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1279  TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL))
1280  .addImm(X86MFInfo->getBytesToPopOnReturn());
1281  } else {
1282  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1283  TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
1284  }
1285  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1286  MIB.addReg(RetRegs[i], RegState::Implicit);
1287  return true;
1288 }
1289 
1290 /// X86SelectLoad - Select and emit code to implement load instructions.
1291 ///
1292 bool X86FastISel::X86SelectLoad(const Instruction *I) {
1293  const LoadInst *LI = cast<LoadInst>(I);
1294 
1295  // Atomic loads need special handling.
1296  if (LI->isAtomic())
1297  return false;
1298 
1299  const Value *SV = I->getOperand(0);
1300  if (TLI.supportSwiftError()) {
1301  // Swifterror values can come from either a function parameter with
1302  // swifterror attribute or an alloca with swifterror attribute.
1303  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1304  if (Arg->hasSwiftErrorAttr())
1305  return false;
1306  }
1307 
1308  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1309  if (Alloca->isSwiftError())
1310  return false;
1311  }
1312  }
1313 
1314  MVT VT;
1315  if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1316  return false;
1317 
1318  const Value *Ptr = LI->getPointerOperand();
1319 
1320  X86AddressMode AM;
1321  if (!X86SelectAddress(Ptr, AM))
1322  return false;
1323 
1324  unsigned Alignment = LI->getAlignment();
1325  unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
1326  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1327  Alignment = ABIAlignment;
1328 
1329  unsigned ResultReg = 0;
1330  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1331  Alignment))
1332  return false;
1333 
1334  updateValueMap(I, ResultReg);
1335  return true;
1336 }
1337 
1338 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1339  bool HasAVX512 = Subtarget->hasAVX512();
1340  bool HasAVX = Subtarget->hasAVX();
1341  bool X86ScalarSSEf32 = Subtarget->hasSSE1();
1342  bool X86ScalarSSEf64 = Subtarget->hasSSE2();
1343 
1344  switch (VT.getSimpleVT().SimpleTy) {
1345  default: return 0;
1346  case MVT::i8: return X86::CMP8rr;
1347  case MVT::i16: return X86::CMP16rr;
1348  case MVT::i32: return X86::CMP32rr;
1349  case MVT::i64: return X86::CMP64rr;
1350  case MVT::f32:
1351  return X86ScalarSSEf32
1352  ? (HasAVX512 ? X86::VUCOMISSZrr
1353  : HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr)
1354  : 0;
1355  case MVT::f64:
1356  return X86ScalarSSEf64
1357  ? (HasAVX512 ? X86::VUCOMISDZrr
1358  : HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr)
1359  : 0;
1360  }
1361 }
1362 
1363 /// If we have a comparison with RHS as the RHS of the comparison, return an
1364 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1365 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1366  int64_t Val = RHSC->getSExtValue();
1367  switch (VT.getSimpleVT().SimpleTy) {
1368  // Otherwise, we can't fold the immediate into this comparison.
1369  default:
1370  return 0;
1371  case MVT::i8:
1372  return X86::CMP8ri;
1373  case MVT::i16:
1374  if (isInt<8>(Val))
1375  return X86::CMP16ri8;
1376  return X86::CMP16ri;
1377  case MVT::i32:
1378  if (isInt<8>(Val))
1379  return X86::CMP32ri8;
1380  return X86::CMP32ri;
1381  case MVT::i64:
1382  if (isInt<8>(Val))
1383  return X86::CMP64ri8;
1384  // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1385  // field.
1386  if (isInt<32>(Val))
1387  return X86::CMP64ri32;
1388  return 0;
1389  }
1390 }
1391 
1392 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
1393  const DebugLoc &CurDbgLoc) {
1394  unsigned Op0Reg = getRegForValue(Op0);
1395  if (Op0Reg == 0) return false;
1396 
1397  // Handle 'null' like i32/i64 0.
1398  if (isa<ConstantPointerNull>(Op1))
1399  Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1400 
1401  // We have two options: compare with register or immediate. If the RHS of
1402  // the compare is an immediate that we can fold into this compare, use
1403  // CMPri, otherwise use CMPrr.
1404  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1405  if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1406  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
1407  .addReg(Op0Reg)
1408  .addImm(Op1C->getSExtValue());
1409  return true;
1410  }
1411  }
1412 
1413  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1414  if (CompareOpc == 0) return false;
1415 
1416  unsigned Op1Reg = getRegForValue(Op1);
1417  if (Op1Reg == 0) return false;
1418  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
1419  .addReg(Op0Reg)
1420  .addReg(Op1Reg);
1421 
1422  return true;
1423 }
1424 
1425 bool X86FastISel::X86SelectCmp(const Instruction *I) {
1426  const CmpInst *CI = cast<CmpInst>(I);
1427 
1428  MVT VT;
1429  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1430  return false;
1431 
1432  // Try to optimize or fold the cmp.
1433  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1434  unsigned ResultReg = 0;
1435  switch (Predicate) {
1436  default: break;
1437  case CmpInst::FCMP_FALSE: {
1438  ResultReg = createResultReg(&X86::GR32RegClass);
1439  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
1440  ResultReg);
1441  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
1442  X86::sub_8bit);
1443  if (!ResultReg)
1444  return false;
1445  break;
1446  }
1447  case CmpInst::FCMP_TRUE: {
1448  ResultReg = createResultReg(&X86::GR8RegClass);
1449  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
1450  ResultReg).addImm(1);
1451  break;
1452  }
1453  }
1454 
1455  if (ResultReg) {
1456  updateValueMap(I, ResultReg);
1457  return true;
1458  }
1459 
1460  const Value *LHS = CI->getOperand(0);
1461  const Value *RHS = CI->getOperand(1);
1462 
1463  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1464  // We don't have to materialize a zero constant for this case and can just use
1465  // %x again on the RHS.
1466  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1467  const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1468  if (RHSC && RHSC->isNullValue())
1469  RHS = LHS;
1470  }
1471 
1472  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1473  static const uint16_t SETFOpcTable[2][3] = {
1474  { X86::COND_E, X86::COND_NP, X86::AND8rr },
1475  { X86::COND_NE, X86::COND_P, X86::OR8rr }
1476  };
1477  const uint16_t *SETFOpc = nullptr;
1478  switch (Predicate) {
1479  default: break;
1480  case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1481  case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1482  }
1483 
1484  ResultReg = createResultReg(&X86::GR8RegClass);
1485  if (SETFOpc) {
1486  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1487  return false;
1488 
1489  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
1490  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
1491  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1492  FlagReg1).addImm(SETFOpc[0]);
1493  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1494  FlagReg2).addImm(SETFOpc[1]);
1495  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
1496  ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1497  updateValueMap(I, ResultReg);
1498  return true;
1499  }
1500 
1501  X86::CondCode CC;
1502  bool SwapArgs;
1503  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1504  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1505 
1506  if (SwapArgs)
1507  std::swap(LHS, RHS);
1508 
1509  // Emit a compare of LHS/RHS.
1510  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1511  return false;
1512 
1513  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1514  ResultReg).addImm(CC);
1515  updateValueMap(I, ResultReg);
1516  return true;
1517 }
1518 
1519 bool X86FastISel::X86SelectZExt(const Instruction *I) {
1520  EVT DstVT = TLI.getValueType(DL, I->getType());
1521  if (!TLI.isTypeLegal(DstVT))
1522  return false;
1523 
1524  unsigned ResultReg = getRegForValue(I->getOperand(0));
1525  if (ResultReg == 0)
1526  return false;
1527 
1528  // Handle zero-extension from i1 to i8, which is common.
1529  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1530  if (SrcVT == MVT::i1) {
1531  // Set the high bits to zero.
1532  ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
1533  SrcVT = MVT::i8;
1534 
1535  if (ResultReg == 0)
1536  return false;
1537  }
1538 
1539  if (DstVT == MVT::i64) {
1540  // Handle extension to 64-bits via sub-register shenanigans.
1541  unsigned MovInst;
1542 
1543  switch (SrcVT.SimpleTy) {
1544  case MVT::i8: MovInst = X86::MOVZX32rr8; break;
1545  case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1546  case MVT::i32: MovInst = X86::MOV32rr; break;
1547  default: llvm_unreachable("Unexpected zext to i64 source type");
1548  }
1549 
1550  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1551  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
1552  .addReg(ResultReg);
1553 
1554  ResultReg = createResultReg(&X86::GR64RegClass);
1555  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
1556  ResultReg)
1557  .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1558  } else if (DstVT == MVT::i16) {
1559  // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1560  // extend to 32-bits and then extract down to 16-bits.
1561  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1562  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8),
1563  Result32).addReg(ResultReg);
1564 
1565  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1566  X86::sub_16bit);
1567  } else if (DstVT != MVT::i8) {
1568  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1569  ResultReg, /*Kill=*/true);
1570  if (ResultReg == 0)
1571  return false;
1572  }
1573 
1574  updateValueMap(I, ResultReg);
1575  return true;
1576 }
1577 
1578 bool X86FastISel::X86SelectSExt(const Instruction *I) {
1579  EVT DstVT = TLI.getValueType(DL, I->getType());
1580  if (!TLI.isTypeLegal(DstVT))
1581  return false;
1582 
1583  unsigned ResultReg = getRegForValue(I->getOperand(0));
1584  if (ResultReg == 0)
1585  return false;
1586 
1587  // Handle sign-extension from i1 to i8.
1588  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1589  if (SrcVT == MVT::i1) {
1590  // Set the high bits to zero.
1591  unsigned ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg,
1592  /*TODO: Kill=*/false);
1593  if (ZExtReg == 0)
1594  return false;
1595 
1596  // Negate the result to make an 8-bit sign extended value.
1597  ResultReg = createResultReg(&X86::GR8RegClass);
1598  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::NEG8r),
1599  ResultReg).addReg(ZExtReg);
1600 
1601  SrcVT = MVT::i8;
1602  }
1603 
1604  if (DstVT == MVT::i16) {
1605  // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1606  // extend to 32-bits and then extract down to 16-bits.
1607  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1608  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8),
1609  Result32).addReg(ResultReg);
1610 
1611  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1612  X86::sub_16bit);
1613  } else if (DstVT != MVT::i8) {
1614  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
1615  ResultReg, /*Kill=*/true);
1616  if (ResultReg == 0)
1617  return false;
1618  }
1619 
1620  updateValueMap(I, ResultReg);
1621  return true;
1622 }
1623 
1624 bool X86FastISel::X86SelectBranch(const Instruction *I) {
1625  // Unconditional branches are selected by tablegen-generated code.
1626  // Handle a conditional branch.
1627  const BranchInst *BI = cast<BranchInst>(I);
1628  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1629  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1630 
1631  // Fold the common case of a conditional branch with a comparison
1632  // in the same block (values defined on other blocks may not have
1633  // initialized registers).
1634  X86::CondCode CC;
1635  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1636  if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1637  EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1638 
1639  // Try to optimize or fold the cmp.
1640  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1641  switch (Predicate) {
1642  default: break;
1643  case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
1644  case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
1645  }
1646 
1647  const Value *CmpLHS = CI->getOperand(0);
1648  const Value *CmpRHS = CI->getOperand(1);
1649 
1650  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1651  // 0.0.
1652  // We don't have to materialize a zero constant for this case and can just
1653  // use %x again on the RHS.
1654  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1655  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1656  if (CmpRHSC && CmpRHSC->isNullValue())
1657  CmpRHS = CmpLHS;
1658  }
1659 
1660  // Try to take advantage of fallthrough opportunities.
1661  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1662  std::swap(TrueMBB, FalseMBB);
1663  Predicate = CmpInst::getInversePredicate(Predicate);
1664  }
1665 
1666  // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1667  // code check. Instead two branch instructions are required to check all
1668  // the flags. First we change the predicate to a supported condition code,
1669  // which will be the first branch. Later one we will emit the second
1670  // branch.
1671  bool NeedExtraBranch = false;
1672  switch (Predicate) {
1673  default: break;
1674  case CmpInst::FCMP_OEQ:
1675  std::swap(TrueMBB, FalseMBB);
1677  case CmpInst::FCMP_UNE:
1678  NeedExtraBranch = true;
1679  Predicate = CmpInst::FCMP_ONE;
1680  break;
1681  }
1682 
1683  bool SwapArgs;
1684  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1685  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1686 
1687  if (SwapArgs)
1688  std::swap(CmpLHS, CmpRHS);
1689 
1690  // Emit a compare of the LHS and RHS, setting the flags.
1691  if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
1692  return false;
1693 
1694  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1695  .addMBB(TrueMBB).addImm(CC);
1696 
1697  // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1698  // to UNE above).
1699  if (NeedExtraBranch) {
1700  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1701  .addMBB(TrueMBB).addImm(X86::COND_P);
1702  }
1703 
1704  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1705  return true;
1706  }
1707  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1708  // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1709  // typically happen for _Bool and C++ bools.
1710  MVT SourceVT;
1711  if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1712  isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1713  unsigned TestOpc = 0;
1714  switch (SourceVT.SimpleTy) {
1715  default: break;
1716  case MVT::i8: TestOpc = X86::TEST8ri; break;
1717  case MVT::i16: TestOpc = X86::TEST16ri; break;
1718  case MVT::i32: TestOpc = X86::TEST32ri; break;
1719  case MVT::i64: TestOpc = X86::TEST64ri32; break;
1720  }
1721  if (TestOpc) {
1722  unsigned OpReg = getRegForValue(TI->getOperand(0));
1723  if (OpReg == 0) return false;
1724 
1725  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
1726  .addReg(OpReg).addImm(1);
1727 
1728  unsigned JmpCond = X86::COND_NE;
1729  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1730  std::swap(TrueMBB, FalseMBB);
1731  JmpCond = X86::COND_E;
1732  }
1733 
1734  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1735  .addMBB(TrueMBB).addImm(JmpCond);
1736 
1737  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1738  return true;
1739  }
1740  }
1741  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1742  // Fake request the condition, otherwise the intrinsic might be completely
1743  // optimized away.
1744  unsigned TmpReg = getRegForValue(BI->getCondition());
1745  if (TmpReg == 0)
1746  return false;
1747 
1748  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1749  .addMBB(TrueMBB).addImm(CC);
1750  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1751  return true;
1752  }
1753 
1754  // Otherwise do a clumsy setcc and re-test it.
1755  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1756  // in an explicit cast, so make sure to handle that correctly.
1757  unsigned OpReg = getRegForValue(BI->getCondition());
1758  if (OpReg == 0) return false;
1759 
1760  // In case OpReg is a K register, COPY to a GPR
1761  if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1762  unsigned KOpReg = OpReg;
1763  OpReg = createResultReg(&X86::GR32RegClass);
1764  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1765  TII.get(TargetOpcode::COPY), OpReg)
1766  .addReg(KOpReg);
1767  OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
1768  X86::sub_8bit);
1769  }
1770  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1771  .addReg(OpReg)
1772  .addImm(1);
1773  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1774  .addMBB(TrueMBB).addImm(X86::COND_NE);
1775  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1776  return true;
1777 }
1778 
1779 bool X86FastISel::X86SelectShift(const Instruction *I) {
1780  unsigned CReg = 0, OpReg = 0;
1781  const TargetRegisterClass *RC = nullptr;
1782  if (I->getType()->isIntegerTy(8)) {
1783  CReg = X86::CL;
1784  RC = &X86::GR8RegClass;
1785  switch (I->getOpcode()) {
1786  case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1787  case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1788  case Instruction::Shl: OpReg = X86::SHL8rCL; break;
1789  default: return false;
1790  }
1791  } else if (I->getType()->isIntegerTy(16)) {
1792  CReg = X86::CX;
1793  RC = &X86::GR16RegClass;
1794  switch (I->getOpcode()) {
1795  default: llvm_unreachable("Unexpected shift opcode");
1796  case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1797  case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1798  case Instruction::Shl: OpReg = X86::SHL16rCL; break;
1799  }
1800  } else if (I->getType()->isIntegerTy(32)) {
1801  CReg = X86::ECX;
1802  RC = &X86::GR32RegClass;
1803  switch (I->getOpcode()) {
1804  default: llvm_unreachable("Unexpected shift opcode");
1805  case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1806  case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1807  case Instruction::Shl: OpReg = X86::SHL32rCL; break;
1808  }
1809  } else if (I->getType()->isIntegerTy(64)) {
1810  CReg = X86::RCX;
1811  RC = &X86::GR64RegClass;
1812  switch (I->getOpcode()) {
1813  default: llvm_unreachable("Unexpected shift opcode");
1814  case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1815  case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1816  case Instruction::Shl: OpReg = X86::SHL64rCL; break;
1817  }
1818  } else {
1819  return false;
1820  }
1821 
1822  MVT VT;
1823  if (!isTypeLegal(I->getType(), VT))
1824  return false;
1825 
1826  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1827  if (Op0Reg == 0) return false;
1828 
1829  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1830  if (Op1Reg == 0) return false;
1831  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1832  CReg).addReg(Op1Reg);
1833 
1834  // The shift instruction uses X86::CL. If we defined a super-register
1835  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1836  if (CReg != X86::CL)
1837  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1838  TII.get(TargetOpcode::KILL), X86::CL)
1839  .addReg(CReg, RegState::Kill);
1840 
1841  unsigned ResultReg = createResultReg(RC);
1842  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
1843  .addReg(Op0Reg);
1844  updateValueMap(I, ResultReg);
1845  return true;
1846 }
1847 
1848 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1849  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1850  const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1851  const static bool S = true; // IsSigned
1852  const static bool U = false; // !IsSigned
1853  const static unsigned Copy = TargetOpcode::COPY;
1854  // For the X86 DIV/IDIV instruction, in most cases the dividend
1855  // (numerator) must be in a specific register pair highreg:lowreg,
1856  // producing the quotient in lowreg and the remainder in highreg.
1857  // For most data types, to set up the instruction, the dividend is
1858  // copied into lowreg, and lowreg is sign-extended or zero-extended
1859  // into highreg. The exception is i8, where the dividend is defined
1860  // as a single register rather than a register pair, and we
1861  // therefore directly sign-extend or zero-extend the dividend into
1862  // lowreg, instead of copying, and ignore the highreg.
1863  const static struct DivRemEntry {
1864  // The following portion depends only on the data type.
1865  const TargetRegisterClass *RC;
1866  unsigned LowInReg; // low part of the register pair
1867  unsigned HighInReg; // high part of the register pair
1868  // The following portion depends on both the data type and the operation.
1869  struct DivRemResult {
1870  unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1871  unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1872  // highreg, or copying a zero into highreg.
1873  unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1874  // zero/sign-extending into lowreg for i8.
1875  unsigned DivRemResultReg; // Register containing the desired result.
1876  bool IsOpSigned; // Whether to use signed or unsigned form.
1877  } ResultTable[NumOps];
1878  } OpTable[NumTypes] = {
1879  { &X86::GR8RegClass, X86::AX, 0, {
1880  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1881  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1882  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1883  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1884  }
1885  }, // i8
1886  { &X86::GR16RegClass, X86::AX, X86::DX, {
1887  { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1888  { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1889  { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
1890  { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
1891  }
1892  }, // i16
1893  { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1894  { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1895  { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1896  { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1897  { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1898  }
1899  }, // i32
1900  { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1901  { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1902  { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1903  { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
1904  { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
1905  }
1906  }, // i64
1907  };
1908 
1909  MVT VT;
1910  if (!isTypeLegal(I->getType(), VT))
1911  return false;
1912 
1913  unsigned TypeIndex, OpIndex;
1914  switch (VT.SimpleTy) {
1915  default: return false;
1916  case MVT::i8: TypeIndex = 0; break;
1917  case MVT::i16: TypeIndex = 1; break;
1918  case MVT::i32: TypeIndex = 2; break;
1919  case MVT::i64: TypeIndex = 3;
1920  if (!Subtarget->is64Bit())
1921  return false;
1922  break;
1923  }
1924 
1925  switch (I->getOpcode()) {
1926  default: llvm_unreachable("Unexpected div/rem opcode");
1927  case Instruction::SDiv: OpIndex = 0; break;
1928  case Instruction::SRem: OpIndex = 1; break;
1929  case Instruction::UDiv: OpIndex = 2; break;
1930  case Instruction::URem: OpIndex = 3; break;
1931  }
1932 
1933  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1934  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1935  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1936  if (Op0Reg == 0)
1937  return false;
1938  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1939  if (Op1Reg == 0)
1940  return false;
1941 
1942  // Move op0 into low-order input register.
1943  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1944  TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1945  // Zero-extend or sign-extend into high-order input register.
1946  if (OpEntry.OpSignExtend) {
1947  if (OpEntry.IsOpSigned)
1948  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1949  TII.get(OpEntry.OpSignExtend));
1950  else {
1951  unsigned Zero32 = createResultReg(&X86::GR32RegClass);
1952  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1953  TII.get(X86::MOV32r0), Zero32);
1954 
1955  // Copy the zero into the appropriate sub/super/identical physical
1956  // register. Unfortunately the operations needed are not uniform enough
1957  // to fit neatly into the table above.
1958  if (VT == MVT::i16) {
1959  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1960  TII.get(Copy), TypeEntry.HighInReg)
1961  .addReg(Zero32, 0, X86::sub_16bit);
1962  } else if (VT == MVT::i32) {
1963  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1964  TII.get(Copy), TypeEntry.HighInReg)
1965  .addReg(Zero32);
1966  } else if (VT == MVT::i64) {
1967  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1968  TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1969  .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1970  }
1971  }
1972  }
1973  // Generate the DIV/IDIV instruction.
1974  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1975  TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1976  // For i8 remainder, we can't reference ah directly, as we'll end
1977  // up with bogus copies like %r9b = COPY %ah. Reference ax
1978  // instead to prevent ah references in a rex instruction.
1979  //
1980  // The current assumption of the fast register allocator is that isel
1981  // won't generate explicit references to the GR8_NOREX registers. If
1982  // the allocator and/or the backend get enhanced to be more robust in
1983  // that regard, this can be, and should be, removed.
1984  unsigned ResultReg = 0;
1985  if ((I->getOpcode() == Instruction::SRem ||
1986  I->getOpcode() == Instruction::URem) &&
1987  OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
1988  unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
1989  unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
1990  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1991  TII.get(Copy), SourceSuperReg).addReg(X86::AX);
1992 
1993  // Shift AX right by 8 bits instead of using AH.
1994  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
1995  ResultSuperReg).addReg(SourceSuperReg).addImm(8);
1996 
1997  // Now reference the 8-bit subreg of the result.
1998  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
1999  /*Kill=*/true, X86::sub_8bit);
2000  }
2001  // Copy the result out of the physreg if we haven't already.
2002  if (!ResultReg) {
2003  ResultReg = createResultReg(TypeEntry.RC);
2004  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
2005  .addReg(OpEntry.DivRemResultReg);
2006  }
2007  updateValueMap(I, ResultReg);
2008 
2009  return true;
2010 }
2011 
2012 /// Emit a conditional move instruction (if the are supported) to lower
2013 /// the select.
2014 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
2015  // Check if the subtarget supports these instructions.
2016  if (!Subtarget->hasCMov())
2017  return false;
2018 
2019  // FIXME: Add support for i8.
2020  if (RetVT < MVT::i16 || RetVT > MVT::i64)
2021  return false;
2022 
2023  const Value *Cond = I->getOperand(0);
2024  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2025  bool NeedTest = true;
2027 
2028  // Optimize conditions coming from a compare if both instructions are in the
2029  // same basic block (values defined in other basic blocks may not have
2030  // initialized registers).
2031  const auto *CI = dyn_cast<CmpInst>(Cond);
2032  if (CI && (CI->getParent() == I->getParent())) {
2033  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2034 
2035  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2036  static const uint16_t SETFOpcTable[2][3] = {
2037  { X86::COND_NP, X86::COND_E, X86::TEST8rr },
2038  { X86::COND_P, X86::COND_NE, X86::OR8rr }
2039  };
2040  const uint16_t *SETFOpc = nullptr;
2041  switch (Predicate) {
2042  default: break;
2043  case CmpInst::FCMP_OEQ:
2044  SETFOpc = &SETFOpcTable[0][0];
2045  Predicate = CmpInst::ICMP_NE;
2046  break;
2047  case CmpInst::FCMP_UNE:
2048  SETFOpc = &SETFOpcTable[1][0];
2049  Predicate = CmpInst::ICMP_NE;
2050  break;
2051  }
2052 
2053  bool NeedSwap;
2054  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
2055  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
2056 
2057  const Value *CmpLHS = CI->getOperand(0);
2058  const Value *CmpRHS = CI->getOperand(1);
2059  if (NeedSwap)
2060  std::swap(CmpLHS, CmpRHS);
2061 
2062  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2063  // Emit a compare of the LHS and RHS, setting the flags.
2064  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2065  return false;
2066 
2067  if (SETFOpc) {
2068  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
2069  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
2070  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2071  FlagReg1).addImm(SETFOpc[0]);
2072  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2073  FlagReg2).addImm(SETFOpc[1]);
2074  auto const &II = TII.get(SETFOpc[2]);
2075  if (II.getNumDefs()) {
2076  unsigned TmpReg = createResultReg(&X86::GR8RegClass);
2077  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
2078  .addReg(FlagReg2).addReg(FlagReg1);
2079  } else {
2080  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2081  .addReg(FlagReg2).addReg(FlagReg1);
2082  }
2083  }
2084  NeedTest = false;
2085  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
2086  // Fake request the condition, otherwise the intrinsic might be completely
2087  // optimized away.
2088  unsigned TmpReg = getRegForValue(Cond);
2089  if (TmpReg == 0)
2090  return false;
2091 
2092  NeedTest = false;
2093  }
2094 
2095  if (NeedTest) {
2096  // Selects operate on i1, however, CondReg is 8 bits width and may contain
2097  // garbage. Indeed, only the less significant bit is supposed to be
2098  // accurate. If we read more than the lsb, we may see non-zero values
2099  // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2100  // the select. This is achieved by performing TEST against 1.
2101  unsigned CondReg = getRegForValue(Cond);
2102  if (CondReg == 0)
2103  return false;
2104  bool CondIsKill = hasTrivialKill(Cond);
2105 
2106  // In case OpReg is a K register, COPY to a GPR
2107  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2108  unsigned KCondReg = CondReg;
2109  CondReg = createResultReg(&X86::GR32RegClass);
2110  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2111  TII.get(TargetOpcode::COPY), CondReg)
2112  .addReg(KCondReg, getKillRegState(CondIsKill));
2113  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2114  X86::sub_8bit);
2115  }
2116  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2117  .addReg(CondReg, getKillRegState(CondIsKill))
2118  .addImm(1);
2119  }
2120 
2121  const Value *LHS = I->getOperand(1);
2122  const Value *RHS = I->getOperand(2);
2123 
2124  unsigned RHSReg = getRegForValue(RHS);
2125  bool RHSIsKill = hasTrivialKill(RHS);
2126 
2127  unsigned LHSReg = getRegForValue(LHS);
2128  bool LHSIsKill = hasTrivialKill(LHS);
2129 
2130  if (!LHSReg || !RHSReg)
2131  return false;
2132 
2133  const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
2134  unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(*RC)/8);
2135  unsigned ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill,
2136  LHSReg, LHSIsKill, CC);
2137  updateValueMap(I, ResultReg);
2138  return true;
2139 }
2140 
2141 /// Emit SSE or AVX instructions to lower the select.
2142 ///
2143 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2144 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2145 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2146 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
2147  // Optimize conditions coming from a compare if both instructions are in the
2148  // same basic block (values defined in other basic blocks may not have
2149  // initialized registers).
2150  const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
2151  if (!CI || (CI->getParent() != I->getParent()))
2152  return false;
2153 
2154  if (I->getType() != CI->getOperand(0)->getType() ||
2155  !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
2156  (Subtarget->hasSSE2() && RetVT == MVT::f64)))
2157  return false;
2158 
2159  const Value *CmpLHS = CI->getOperand(0);
2160  const Value *CmpRHS = CI->getOperand(1);
2161  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2162 
2163  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2164  // We don't have to materialize a zero constant for this case and can just use
2165  // %x again on the RHS.
2166  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
2167  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2168  if (CmpRHSC && CmpRHSC->isNullValue())
2169  CmpRHS = CmpLHS;
2170  }
2171 
2172  unsigned CC;
2173  bool NeedSwap;
2174  std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
2175  if (CC > 7 && !Subtarget->hasAVX())
2176  return false;
2177 
2178  if (NeedSwap)
2179  std::swap(CmpLHS, CmpRHS);
2180 
2181  const Value *LHS = I->getOperand(1);
2182  const Value *RHS = I->getOperand(2);
2183 
2184  unsigned LHSReg = getRegForValue(LHS);
2185  bool LHSIsKill = hasTrivialKill(LHS);
2186 
2187  unsigned RHSReg = getRegForValue(RHS);
2188  bool RHSIsKill = hasTrivialKill(RHS);
2189 
2190  unsigned CmpLHSReg = getRegForValue(CmpLHS);
2191  bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
2192 
2193  unsigned CmpRHSReg = getRegForValue(CmpRHS);
2194  bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
2195 
2196  if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
2197  return false;
2198 
2199  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2200  unsigned ResultReg;
2201 
2202  if (Subtarget->hasAVX512()) {
2203  // If we have AVX512 we can use a mask compare and masked movss/sd.
2204  const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
2205  const TargetRegisterClass *VK1 = &X86::VK1RegClass;
2206 
2207  unsigned CmpOpcode =
2208  (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2209  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
2210  CmpRHSReg, CmpRHSIsKill, CC);
2211 
2212  // Need an IMPLICIT_DEF for the input that is used to generate the upper
2213  // bits of the result register since its not based on any of the inputs.
2214  unsigned ImplicitDefReg = createResultReg(VR128X);
2215  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2216  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2217 
2218  // Place RHSReg is the passthru of the masked movss/sd operation and put
2219  // LHS in the input. The mask input comes from the compare.
2220  unsigned MovOpcode =
2221  (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2222  unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill,
2223  CmpReg, true, ImplicitDefReg, true,
2224  LHSReg, LHSIsKill);
2225 
2226  ResultReg = createResultReg(RC);
2227  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2228  TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
2229 
2230  } else if (Subtarget->hasAVX()) {
2231  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2232 
2233  // If we have AVX, create 1 blendv instead of 3 logic instructions.
2234  // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2235  // uses XMM0 as the selection register. That may need just as many
2236  // instructions as the AND/ANDN/OR sequence due to register moves, so
2237  // don't bother.
2238  unsigned CmpOpcode =
2239  (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2240  unsigned BlendOpcode =
2241  (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2242 
2243  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
2244  CmpRHSReg, CmpRHSIsKill, CC);
2245  unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
2246  LHSReg, LHSIsKill, CmpReg, true);
2247  ResultReg = createResultReg(RC);
2248  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2249  TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
2250  } else {
2251  // Choose the SSE instruction sequence based on data type (float or double).
2252  static const uint16_t OpcTable[2][4] = {
2253  { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2254  { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2255  };
2256 
2257  const uint16_t *Opc = nullptr;
2258  switch (RetVT.SimpleTy) {
2259  default: return false;
2260  case MVT::f32: Opc = &OpcTable[0][0]; break;
2261  case MVT::f64: Opc = &OpcTable[1][0]; break;
2262  }
2263 
2264  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2265  unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
2266  CmpRHSReg, CmpRHSIsKill, CC);
2267  unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
2268  LHSReg, LHSIsKill);
2269  unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
2270  RHSReg, RHSIsKill);
2271  unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
2272  AndReg, /*IsKill=*/true);
2273  ResultReg = createResultReg(RC);
2274  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2275  TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
2276  }
2277  updateValueMap(I, ResultReg);
2278  return true;
2279 }
2280 
2281 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
2282  // These are pseudo CMOV instructions and will be later expanded into control-
2283  // flow.
2284  unsigned Opc;
2285  switch (RetVT.SimpleTy) {
2286  default: return false;
2287  case MVT::i8: Opc = X86::CMOV_GR8; break;
2288  case MVT::i16: Opc = X86::CMOV_GR16; break;
2289  case MVT::i32: Opc = X86::CMOV_GR32; break;
2290  case MVT::f32: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X
2291  : X86::CMOV_FR32; break;
2292  case MVT::f64: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X
2293  : X86::CMOV_FR64; break;
2294  }
2295 
2296  const Value *Cond = I->getOperand(0);
2298 
2299  // Optimize conditions coming from a compare if both instructions are in the
2300  // same basic block (values defined in other basic blocks may not have
2301  // initialized registers).
2302  const auto *CI = dyn_cast<CmpInst>(Cond);
2303  if (CI && (CI->getParent() == I->getParent())) {
2304  bool NeedSwap;
2305  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
2306  if (CC > X86::LAST_VALID_COND)
2307  return false;
2308 
2309  const Value *CmpLHS = CI->getOperand(0);
2310  const Value *CmpRHS = CI->getOperand(1);
2311 
2312  if (NeedSwap)
2313  std::swap(CmpLHS, CmpRHS);
2314 
2315  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2316  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2317  return false;
2318  } else {
2319  unsigned CondReg = getRegForValue(Cond);
2320  if (CondReg == 0)
2321  return false;
2322  bool CondIsKill = hasTrivialKill(Cond);
2323 
2324  // In case OpReg is a K register, COPY to a GPR
2325  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2326  unsigned KCondReg = CondReg;
2327  CondReg = createResultReg(&X86::GR32RegClass);
2328  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2329  TII.get(TargetOpcode::COPY), CondReg)
2330  .addReg(KCondReg, getKillRegState(CondIsKill));
2331  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2332  X86::sub_8bit);
2333  }
2334  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2335  .addReg(CondReg, getKillRegState(CondIsKill))
2336  .addImm(1);
2337  }
2338 
2339  const Value *LHS = I->getOperand(1);
2340  const Value *RHS = I->getOperand(2);
2341 
2342  unsigned LHSReg = getRegForValue(LHS);
2343  bool LHSIsKill = hasTrivialKill(LHS);
2344 
2345  unsigned RHSReg = getRegForValue(RHS);
2346  bool RHSIsKill = hasTrivialKill(RHS);
2347 
2348  if (!LHSReg || !RHSReg)
2349  return false;
2350 
2351  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2352 
2353  unsigned ResultReg =
2354  fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
2355  updateValueMap(I, ResultReg);
2356  return true;
2357 }
2358 
2359 bool X86FastISel::X86SelectSelect(const Instruction *I) {
2360  MVT RetVT;
2361  if (!isTypeLegal(I->getType(), RetVT))
2362  return false;
2363 
2364  // Check if we can fold the select.
2365  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2366  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2367  const Value *Opnd = nullptr;
2368  switch (Predicate) {
2369  default: break;
2370  case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2371  case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
2372  }
2373  // No need for a select anymore - this is an unconditional move.
2374  if (Opnd) {
2375  unsigned OpReg = getRegForValue(Opnd);
2376  if (OpReg == 0)
2377  return false;
2378  bool OpIsKill = hasTrivialKill(Opnd);
2379  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2380  unsigned ResultReg = createResultReg(RC);
2381  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2382  TII.get(TargetOpcode::COPY), ResultReg)
2383  .addReg(OpReg, getKillRegState(OpIsKill));
2384  updateValueMap(I, ResultReg);
2385  return true;
2386  }
2387  }
2388 
2389  // First try to use real conditional move instructions.
2390  if (X86FastEmitCMoveSelect(RetVT, I))
2391  return true;
2392 
2393  // Try to use a sequence of SSE instructions to simulate a conditional move.
2394  if (X86FastEmitSSESelect(RetVT, I))
2395  return true;
2396 
2397  // Fall-back to pseudo conditional move instructions, which will be later
2398  // converted to control-flow.
2399  if (X86FastEmitPseudoSelect(RetVT, I))
2400  return true;
2401 
2402  return false;
2403 }
2404 
2405 // Common code for X86SelectSIToFP and X86SelectUIToFP.
2406 bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
2407  // The target-independent selection algorithm in FastISel already knows how
2408  // to select a SINT_TO_FP if the target is SSE but not AVX.
2409  // Early exit if the subtarget doesn't have AVX.
2410  // Unsigned conversion requires avx512.
2411  bool HasAVX512 = Subtarget->hasAVX512();
2412  if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
2413  return false;
2414 
2415  // TODO: We could sign extend narrower types.
2416  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
2417  if (SrcVT != MVT::i32 && SrcVT != MVT::i64)
2418  return false;
2419 
2420  // Select integer to float/double conversion.
2421  unsigned OpReg = getRegForValue(I->getOperand(0));
2422  if (OpReg == 0)
2423  return false;
2424 
2425  unsigned Opcode;
2426 
2427  static const uint16_t SCvtOpc[2][2][2] = {
2428  { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr },
2429  { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } },
2430  { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
2431  { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
2432  };
2433  static const uint16_t UCvtOpc[2][2] = {
2434  { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
2435  { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
2436  };
2437  bool Is64Bit = SrcVT == MVT::i64;
2438 
2439  if (I->getType()->isDoubleTy()) {
2440  // s/uitofp int -> double
2441  Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
2442  } else if (I->getType()->isFloatTy()) {
2443  // s/uitofp int -> float
2444  Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
2445  } else
2446  return false;
2447 
2448  MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT();
2449  const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT);
2450  unsigned ImplicitDefReg = createResultReg(RC);
2451  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2452  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2453  unsigned ResultReg =
2454  fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
2455  updateValueMap(I, ResultReg);
2456  return true;
2457 }
2458 
2459 bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
2460  return X86SelectIntToFP(I, /*IsSigned*/true);
2461 }
2462 
2463 bool X86FastISel::X86SelectUIToFP(const Instruction *I) {
2464  return X86SelectIntToFP(I, /*IsSigned*/false);
2465 }
2466 
2467 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2468 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
2469  unsigned TargetOpc,
2470  const TargetRegisterClass *RC) {
2471  assert((I->getOpcode() == Instruction::FPExt ||
2472  I->getOpcode() == Instruction::FPTrunc) &&
2473  "Instruction must be an FPExt or FPTrunc!");
2474  bool HasAVX = Subtarget->hasAVX();
2475 
2476  unsigned OpReg = getRegForValue(I->getOperand(0));
2477  if (OpReg == 0)
2478  return false;
2479 
2480  unsigned ImplicitDefReg;
2481  if (HasAVX) {
2482  ImplicitDefReg = createResultReg(RC);
2483  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2484  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2485 
2486  }
2487 
2488  unsigned ResultReg = createResultReg(RC);
2489  MachineInstrBuilder MIB;
2490  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
2491  ResultReg);
2492 
2493  if (HasAVX)
2494  MIB.addReg(ImplicitDefReg);
2495 
2496  MIB.addReg(OpReg);
2497  updateValueMap(I, ResultReg);
2498  return true;
2499 }
2500 
2501 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2502  if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
2503  I->getOperand(0)->getType()->isFloatTy()) {
2504  bool HasAVX512 = Subtarget->hasAVX512();
2505  // fpext from float to double.
2506  unsigned Opc =
2507  HasAVX512 ? X86::VCVTSS2SDZrr
2508  : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2509  return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f64));
2510  }
2511 
2512  return false;
2513 }
2514 
2515 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2516  if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
2517  I->getOperand(0)->getType()->isDoubleTy()) {
2518  bool HasAVX512 = Subtarget->hasAVX512();
2519  // fptrunc from double to float.
2520  unsigned Opc =
2521  HasAVX512 ? X86::VCVTSD2SSZrr
2522  : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2523  return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f32));
2524  }
2525 
2526  return false;
2527 }
2528 
2529 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2530  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
2531  EVT DstVT = TLI.getValueType(DL, I->getType());
2532 
2533  // This code only handles truncation to byte.
2534  if (DstVT != MVT::i8 && DstVT != MVT::i1)
2535  return false;
2536  if (!TLI.isTypeLegal(SrcVT))
2537  return false;
2538 
2539  unsigned InputReg = getRegForValue(I->getOperand(0));
2540  if (!InputReg)
2541  // Unhandled operand. Halt "fast" selection and bail.
2542  return false;
2543 
2544  if (SrcVT == MVT::i8) {
2545  // Truncate from i8 to i1; no code needed.
2546  updateValueMap(I, InputReg);
2547  return true;
2548  }
2549 
2550  // Issue an extract_subreg.
2551  unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
2552  InputReg, false,
2553  X86::sub_8bit);
2554  if (!ResultReg)
2555  return false;
2556 
2557  updateValueMap(I, ResultReg);
2558  return true;
2559 }
2560 
2561 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2562  return Len <= (Subtarget->is64Bit() ? 32 : 16);
2563 }
2564 
2565 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2566  X86AddressMode SrcAM, uint64_t Len) {
2567 
2568  // Make sure we don't bloat code by inlining very large memcpy's.
2569  if (!IsMemcpySmall(Len))
2570  return false;
2571 
2572  bool i64Legal = Subtarget->is64Bit();
2573 
2574  // We don't care about alignment here since we just emit integer accesses.
2575  while (Len) {
2576  MVT VT;
2577  if (Len >= 8 && i64Legal)
2578  VT = MVT::i64;
2579  else if (Len >= 4)
2580  VT = MVT::i32;
2581  else if (Len >= 2)
2582  VT = MVT::i16;
2583  else
2584  VT = MVT::i8;
2585 
2586  unsigned Reg;
2587  bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2588  RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
2589  assert(RV && "Failed to emit load or store??");
2590 
2591  unsigned Size = VT.getSizeInBits()/8;
2592  Len -= Size;
2593  DestAM.Disp += Size;
2594  SrcAM.Disp += Size;
2595  }
2596 
2597  return true;
2598 }
2599 
2600 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2601  // FIXME: Handle more intrinsics.
2602  switch (II->getIntrinsicID()) {
2603  default: return false;
2604  case Intrinsic::convert_from_fp16:
2605  case Intrinsic::convert_to_fp16: {
2606  if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2607  return false;
2608 
2609  const Value *Op = II->getArgOperand(0);
2610  unsigned InputReg = getRegForValue(Op);
2611  if (InputReg == 0)
2612  return false;
2613 
2614  // F16C only allows converting from float to half and from half to float.
2615  bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
2616  if (IsFloatToHalf) {
2617  if (!Op->getType()->isFloatTy())
2618  return false;
2619  } else {
2620  if (!II->getType()->isFloatTy())
2621  return false;
2622  }
2623 
2624  unsigned ResultReg = 0;
2625  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
2626  if (IsFloatToHalf) {
2627  // 'InputReg' is implicitly promoted from register class FR32 to
2628  // register class VR128 by method 'constrainOperandRegClass' which is
2629  // directly called by 'fastEmitInst_ri'.
2630  // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2631  // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2632  // It's consistent with the other FP instructions, which are usually
2633  // controlled by MXCSR.
2634  InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4);
2635 
2636  // Move the lower 32-bits of ResultReg to another register of class GR32.
2637  ResultReg = createResultReg(&X86::GR32RegClass);
2638  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2639  TII.get(X86::VMOVPDI2DIrr), ResultReg)
2640  .addReg(InputReg, RegState::Kill);
2641 
2642  // The result value is in the lower 16-bits of ResultReg.
2643  unsigned RegIdx = X86::sub_16bit;
2644  ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
2645  } else {
2646  assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2647  // Explicitly sign-extend the input to 32-bit.
2648  InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg,
2649  /*Kill=*/false);
2650 
2651  // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2652  InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2653  InputReg, /*Kill=*/true);
2654 
2655  InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
2656 
2657  // The result value is in the lower 32-bits of ResultReg.
2658  // Emit an explicit copy from register class VR128 to register class FR32.
2659  ResultReg = createResultReg(&X86::FR32RegClass);
2660  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2661  TII.get(TargetOpcode::COPY), ResultReg)
2662  .addReg(InputReg, RegState::Kill);
2663  }
2664 
2665  updateValueMap(II, ResultReg);
2666  return true;
2667  }
2668  case Intrinsic::frameaddress: {
2669  MachineFunction *MF = FuncInfo.MF;
2670  if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
2671  return false;
2672 
2673  Type *RetTy = II->getCalledFunction()->getReturnType();
2674 
2675  MVT VT;
2676  if (!isTypeLegal(RetTy, VT))
2677  return false;
2678 
2679  unsigned Opc;
2680  const TargetRegisterClass *RC = nullptr;
2681 
2682  switch (VT.SimpleTy) {
2683  default: llvm_unreachable("Invalid result type for frameaddress.");
2684  case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2685  case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2686  }
2687 
2688  // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2689  // we get the wrong frame register.
2690  MachineFrameInfo &MFI = MF->getFrameInfo();
2691  MFI.setFrameAddressIsTaken(true);
2692 
2693  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2694  unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
2695  assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2696  (FrameReg == X86::EBP && VT == MVT::i32)) &&
2697  "Invalid Frame Register!");
2698 
2699  // Always make a copy of the frame register to a vreg first, so that we
2700  // never directly reference the frame register (the TwoAddressInstruction-
2701  // Pass doesn't like that).
2702  unsigned SrcReg = createResultReg(RC);
2703  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2704  TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2705 
2706  // Now recursively load from the frame address.
2707  // movq (%rbp), %rax
2708  // movq (%rax), %rax
2709  // movq (%rax), %rax
2710  // ...
2711  unsigned DestReg;
2712  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2713  while (Depth--) {
2714  DestReg = createResultReg(RC);
2715  addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2716  TII.get(Opc), DestReg), SrcReg);
2717  SrcReg = DestReg;
2718  }
2719 
2720  updateValueMap(II, SrcReg);
2721  return true;
2722  }
2723  case Intrinsic::memcpy: {
2724  const MemCpyInst *MCI = cast<MemCpyInst>(II);
2725  // Don't handle volatile or variable length memcpys.
2726  if (MCI->isVolatile())
2727  return false;
2728 
2729  if (isa<ConstantInt>(MCI->getLength())) {
2730  // Small memcpy's are common enough that we want to do them
2731  // without a call if possible.
2732  uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
2733  if (IsMemcpySmall(Len)) {
2734  X86AddressMode DestAM, SrcAM;
2735  if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
2736  !X86SelectAddress(MCI->getRawSource(), SrcAM))
2737  return false;
2738  TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2739  return true;
2740  }
2741  }
2742 
2743  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2744  if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
2745  return false;
2746 
2747  if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
2748  return false;
2749 
2750  return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1);
2751  }
2752  case Intrinsic::memset: {
2753  const MemSetInst *MSI = cast<MemSetInst>(II);
2754 
2755  if (MSI->isVolatile())
2756  return false;
2757 
2758  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2759  if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
2760  return false;
2761 
2762  if (MSI->getDestAddressSpace() > 255)
2763  return false;
2764 
2765  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
2766  }
2767  case Intrinsic::stackprotector: {
2768  // Emit code to store the stack guard onto the stack.
2769  EVT PtrTy = TLI.getPointerTy(DL);
2770 
2771  const Value *Op1 = II->getArgOperand(0); // The guard's value.
2772  const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
2773 
2774  MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2775 
2776  // Grab the frame index.
2777  X86AddressMode AM;
2778  if (!X86SelectAddress(Slot, AM)) return false;
2779  if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2780  return true;
2781  }
2782  case Intrinsic::dbg_declare: {
2783  const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
2784  X86AddressMode AM;
2785  assert(DI->getAddress() && "Null address should be checked earlier!");
2786  if (!X86SelectAddress(DI->getAddress(), AM))
2787  return false;
2788  const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2789  // FIXME may need to add RegState::Debug to any registers produced,
2790  // although ESP/EBP should be the only ones at the moment.
2792  "Expected inlined-at fields to agree");
2793  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
2794  .addImm(0)
2795  .addMetadata(DI->getVariable())
2796  .addMetadata(DI->getExpression());
2797  return true;
2798  }
2799  case Intrinsic::trap: {
2800  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
2801  return true;
2802  }
2803  case Intrinsic::sqrt: {
2804  if (!Subtarget->hasSSE1())
2805  return false;
2806 
2807  Type *RetTy = II->getCalledFunction()->getReturnType();
2808 
2809  MVT VT;
2810  if (!isTypeLegal(RetTy, VT))
2811  return false;
2812 
2813  // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2814  // is not generated by FastISel yet.
2815  // FIXME: Update this code once tablegen can handle it.
2816  static const uint16_t SqrtOpc[3][2] = {
2817  { X86::SQRTSSr, X86::SQRTSDr },
2818  { X86::VSQRTSSr, X86::VSQRTSDr },
2819  { X86::VSQRTSSZr, X86::VSQRTSDZr },
2820  };
2821  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
2822  Subtarget->hasAVX() ? 1 :
2823  0;
2824  unsigned Opc;
2825  switch (VT.SimpleTy) {
2826  default: return false;
2827  case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break;
2828  case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break;
2829  }
2830 
2831  const Value *SrcVal = II->getArgOperand(0);
2832  unsigned SrcReg = getRegForValue(SrcVal);
2833 
2834  if (SrcReg == 0)
2835  return false;
2836 
2837  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
2838  unsigned ImplicitDefReg = 0;
2839  if (AVXLevel > 0) {
2840  ImplicitDefReg = createResultReg(RC);
2841  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2842  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2843  }
2844 
2845  unsigned ResultReg = createResultReg(RC);
2846  MachineInstrBuilder MIB;
2847  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2848  ResultReg);
2849 
2850  if (ImplicitDefReg)
2851  MIB.addReg(ImplicitDefReg);
2852 
2853  MIB.addReg(SrcReg);
2854 
2855  updateValueMap(II, ResultReg);
2856  return true;
2857  }
2858  case Intrinsic::sadd_with_overflow:
2859  case Intrinsic::uadd_with_overflow:
2860  case Intrinsic::ssub_with_overflow:
2861  case Intrinsic::usub_with_overflow:
2862  case Intrinsic::smul_with_overflow:
2863  case Intrinsic::umul_with_overflow: {
2864  // This implements the basic lowering of the xalu with overflow intrinsics
2865  // into add/sub/mul followed by either seto or setb.
2866  const Function *Callee = II->getCalledFunction();
2867  auto *Ty = cast<StructType>(Callee->getReturnType());
2868  Type *RetTy = Ty->getTypeAtIndex(0U);
2869  assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
2870  Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2871  "Overflow value expected to be an i1");
2872 
2873  MVT VT;
2874  if (!isTypeLegal(RetTy, VT))
2875  return false;
2876 
2877  if (VT < MVT::i8 || VT > MVT::i64)
2878  return false;
2879 
2880  const Value *LHS = II->getArgOperand(0);
2881  const Value *RHS = II->getArgOperand(1);
2882 
2883  // Canonicalize immediate to the RHS.
2884  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2885  isCommutativeIntrinsic(II))
2886  std::swap(LHS, RHS);
2887 
2888  unsigned BaseOpc, CondCode;
2889  switch (II->getIntrinsicID()) {
2890  default: llvm_unreachable("Unexpected intrinsic!");
2891  case Intrinsic::sadd_with_overflow:
2892  BaseOpc = ISD::ADD; CondCode = X86::COND_O; break;
2893  case Intrinsic::uadd_with_overflow:
2894  BaseOpc = ISD::ADD; CondCode = X86::COND_B; break;
2895  case Intrinsic::ssub_with_overflow:
2896  BaseOpc = ISD::SUB; CondCode = X86::COND_O; break;
2897  case Intrinsic::usub_with_overflow:
2898  BaseOpc = ISD::SUB; CondCode = X86::COND_B; break;
2899  case Intrinsic::smul_with_overflow:
2900  BaseOpc = X86ISD::SMUL; CondCode = X86::COND_O; break;
2901  case Intrinsic::umul_with_overflow:
2902  BaseOpc = X86ISD::UMUL; CondCode = X86::COND_O; break;
2903  }
2904 
2905  unsigned LHSReg = getRegForValue(LHS);
2906  if (LHSReg == 0)
2907  return false;
2908  bool LHSIsKill = hasTrivialKill(LHS);
2909 
2910  unsigned ResultReg = 0;
2911  // Check if we have an immediate version.
2912  if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2913  static const uint16_t Opc[2][4] = {
2914  { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2915  { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2916  };
2917 
2918  if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
2919  CondCode == X86::COND_O) {
2920  // We can use INC/DEC.
2921  ResultReg = createResultReg(TLI.getRegClassFor(VT));
2922  bool IsDec = BaseOpc == ISD::SUB;
2923  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2924  TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2925  .addReg(LHSReg, getKillRegState(LHSIsKill));
2926  } else
2927  ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
2928  CI->getZExtValue());
2929  }
2930 
2931  unsigned RHSReg;
2932  bool RHSIsKill;
2933  if (!ResultReg) {
2934  RHSReg = getRegForValue(RHS);
2935  if (RHSReg == 0)
2936  return false;
2937  RHSIsKill = hasTrivialKill(RHS);
2938  ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
2939  RHSIsKill);
2940  }
2941 
2942  // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2943  // it manually.
2944  if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2945  static const uint16_t MULOpc[] =
2946  { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2947  static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2948  // First copy the first operand into RAX, which is an implicit input to
2949  // the X86::MUL*r instruction.
2950  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2951  TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2952  .addReg(LHSReg, getKillRegState(LHSIsKill));
2953  ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2954  TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
2955  } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2956  static const uint16_t MULOpc[] =
2957  { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2958  if (VT == MVT::i8) {
2959  // Copy the first operand into AL, which is an implicit input to the
2960  // X86::IMUL8r instruction.
2961  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2962  TII.get(TargetOpcode::COPY), X86::AL)
2963  .addReg(LHSReg, getKillRegState(LHSIsKill));
2964  ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
2965  RHSIsKill);
2966  } else
2967  ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2968  TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
2969  RHSReg, RHSIsKill);
2970  }
2971 
2972  if (!ResultReg)
2973  return false;
2974 
2975  // Assign to a GPR since the overflow return value is lowered to a SETcc.
2976  unsigned ResultReg2 = createResultReg(&X86::GR8RegClass);
2977  assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2978  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2979  ResultReg2).addImm(CondCode);
2980 
2981  updateValueMap(II, ResultReg, 2);
2982  return true;
2983  }
2984  case Intrinsic::x86_sse_cvttss2si:
2985  case Intrinsic::x86_sse_cvttss2si64:
2986  case Intrinsic::x86_sse2_cvttsd2si:
2987  case Intrinsic::x86_sse2_cvttsd2si64: {
2988  bool IsInputDouble;
2989  switch (II->getIntrinsicID()) {
2990  default: llvm_unreachable("Unexpected intrinsic.");
2991  case Intrinsic::x86_sse_cvttss2si:
2992  case Intrinsic::x86_sse_cvttss2si64:
2993  if (!Subtarget->hasSSE1())
2994  return false;
2995  IsInputDouble = false;
2996  break;
2997  case Intrinsic::x86_sse2_cvttsd2si:
2998  case Intrinsic::x86_sse2_cvttsd2si64:
2999  if (!Subtarget->hasSSE2())
3000  return false;
3001  IsInputDouble = true;
3002  break;
3003  }
3004 
3005  Type *RetTy = II->getCalledFunction()->getReturnType();
3006  MVT VT;
3007  if (!isTypeLegal(RetTy, VT))
3008  return false;
3009 
3010  static const uint16_t CvtOpc[3][2][2] = {
3011  { { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr },
3012  { X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } },
3013  { { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr },
3014  { X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } },
3015  { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr },
3016  { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } },
3017  };
3018  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
3019  Subtarget->hasAVX() ? 1 :
3020  0;
3021  unsigned Opc;
3022  switch (VT.SimpleTy) {
3023  default: llvm_unreachable("Unexpected result type.");
3024  case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0]; break;
3025  case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1]; break;
3026  }
3027 
3028  // Check if we can fold insertelement instructions into the convert.
3029  const Value *Op = II->getArgOperand(0);
3030  while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
3031  const Value *Index = IE->getOperand(2);
3032  if (!isa<ConstantInt>(Index))
3033  break;
3034  unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
3035 
3036  if (Idx == 0) {
3037  Op = IE->getOperand(1);
3038  break;
3039  }
3040  Op = IE->getOperand(0);
3041  }
3042 
3043  unsigned Reg = getRegForValue(Op);
3044  if (Reg == 0)
3045  return false;
3046 
3047  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3048  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3049  .addReg(Reg);
3050 
3051  updateValueMap(II, ResultReg);
3052  return true;
3053  }
3054  }
3055 }
3056 
3057 bool X86FastISel::fastLowerArguments() {
3058  if (!FuncInfo.CanLowerReturn)
3059  return false;
3060 
3061  const Function *F = FuncInfo.Fn;
3062  if (F->isVarArg())
3063  return false;
3064 
3065  CallingConv::ID CC = F->getCallingConv();
3066  if (CC != CallingConv::C)
3067  return false;
3068 
3069  if (Subtarget->isCallingConvWin64(CC))
3070  return false;
3071 
3072  if (!Subtarget->is64Bit())
3073  return false;
3074 
3075  if (Subtarget->useSoftFloat())
3076  return false;
3077 
3078  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3079  unsigned GPRCnt = 0;
3080  unsigned FPRCnt = 0;
3081  for (auto const &Arg : F->args()) {
3082  if (Arg.hasAttribute(Attribute::ByVal) ||
3083  Arg.hasAttribute(Attribute::InReg) ||
3084  Arg.hasAttribute(Attribute::StructRet) ||
3085  Arg.hasAttribute(Attribute::SwiftSelf) ||
3086  Arg.hasAttribute(Attribute::SwiftError) ||
3087  Arg.hasAttribute(Attribute::Nest))
3088  return false;
3089 
3090  Type *ArgTy = Arg.getType();
3091  if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3092  return false;
3093 
3094  EVT ArgVT = TLI.getValueType(DL, ArgTy);
3095  if (!ArgVT.isSimple()) return false;
3096  switch (ArgVT.getSimpleVT().SimpleTy) {
3097  default: return false;
3098  case MVT::i32:
3099  case MVT::i64:
3100  ++GPRCnt;
3101  break;
3102  case MVT::f32:
3103  case MVT::f64:
3104  if (!Subtarget->hasSSE1())
3105  return false;
3106  ++FPRCnt;
3107  break;
3108  }
3109 
3110  if (GPRCnt > 6)
3111  return false;
3112 
3113  if (FPRCnt > 8)
3114  return false;
3115  }
3116 
3117  static const MCPhysReg GPR32ArgRegs[] = {
3118  X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
3119  };
3120  static const MCPhysReg GPR64ArgRegs[] = {
3121  X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3122  };
3123  static const MCPhysReg XMMArgRegs[] = {
3124  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3125  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3126  };
3127 
3128  unsigned GPRIdx = 0;
3129  unsigned FPRIdx = 0;
3130  for (auto const &Arg : F->args()) {
3131  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3132  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
3133  unsigned SrcReg;
3134  switch (VT.SimpleTy) {
3135  default: llvm_unreachable("Unexpected value type.");
3136  case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
3137  case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
3138  case MVT::f32: LLVM_FALLTHROUGH;
3139  case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
3140  }
3141  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3142  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3143  // Without this, EmitLiveInCopies may eliminate the livein if its only
3144  // use is a bitcast (which isn't turned into an instruction).
3145  unsigned ResultReg = createResultReg(RC);
3146  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3147  TII.get(TargetOpcode::COPY), ResultReg)
3148  .addReg(DstReg, getKillRegState(true));
3149  updateValueMap(&Arg, ResultReg);
3150  }
3151  return true;
3152 }
3153 
3154 static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
3155  CallingConv::ID CC,
3156  ImmutableCallSite *CS) {
3157  if (Subtarget->is64Bit())
3158  return 0;
3159  if (Subtarget->getTargetTriple().isOSMSVCRT())
3160  return 0;
3161  if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3162  CC == CallingConv::HiPE || CC == CallingConv::Tail)
3163  return 0;
3164 
3165  if (CS)
3166  if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) ||
3167  CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
3168  return 0;
3169 
3170  return 4;
3171 }
3172 
3173 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3174  auto &OutVals = CLI.OutVals;
3175  auto &OutFlags = CLI.OutFlags;
3176  auto &OutRegs = CLI.OutRegs;
3177  auto &Ins = CLI.Ins;
3178  auto &InRegs = CLI.InRegs;
3179  CallingConv::ID CC = CLI.CallConv;
3180  bool &IsTailCall = CLI.IsTailCall;
3181  bool IsVarArg = CLI.IsVarArg;
3182  const Value *Callee = CLI.Callee;
3183  MCSymbol *Symbol = CLI.Symbol;
3184 
3185  bool Is64Bit = Subtarget->is64Bit();
3186  bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3187 
3188  const CallInst *CI =
3189  CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
3190  const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr;
3191 
3192  // Call / invoke instructions with NoCfCheck attribute require special
3193  // handling.
3194  const auto *II =
3195  CLI.CS ? dyn_cast<InvokeInst>(CLI.CS->getInstruction()) : nullptr;
3196  if ((CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck()))
3197  return false;
3198 
3199  // Functions with no_caller_saved_registers that need special handling.
3200  if ((CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3201  (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
3202  return false;
3203 
3204  // Functions using retpoline for indirect calls need to use SDISel.
3205  if (Subtarget->useRetpolineIndirectCalls())
3206  return false;
3207 
3208  // Handle only C, fastcc, and webkit_js calling conventions for now.
3209  switch (CC) {
3210  default: return false;
3211  case CallingConv::C:
3212  case CallingConv::Fast:
3213  case CallingConv::Tail:
3215  case CallingConv::Swift:
3219  case CallingConv::Win64:
3221  break;
3222  }
3223 
3224  // Allow SelectionDAG isel to handle tail calls.
3225  if (IsTailCall)
3226  return false;
3227 
3228  // fastcc with -tailcallopt is intended to provide a guaranteed
3229  // tail call optimization. Fastisel doesn't know how to do that.
3230  if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
3231  CC == CallingConv::Tail)
3232  return false;
3233 
3234  // Don't know how to handle Win64 varargs yet. Nothing special needed for
3235  // x86-32. Special handling for x86-64 is implemented.
3236  if (IsVarArg && IsWin64)
3237  return false;
3238 
3239  // Don't know about inalloca yet.
3240  if (CLI.CS && CLI.CS->hasInAllocaArgument())
3241  return false;
3242 
3243  for (auto Flag : CLI.OutFlags)
3244  if (Flag.isSwiftError())
3245  return false;
3246 
3247  SmallVector<MVT, 16> OutVTs;
3248  SmallVector<unsigned, 16> ArgRegs;
3249 
3250  // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3251  // instruction. This is safe because it is common to all FastISel supported
3252  // calling conventions on x86.
3253  for (int i = 0, e = OutVals.size(); i != e; ++i) {
3254  Value *&Val = OutVals[i];
3255  ISD::ArgFlagsTy Flags = OutFlags[i];
3256  if (auto *CI = dyn_cast<ConstantInt>(Val)) {
3257  if (CI->getBitWidth() < 32) {
3258  if (Flags.isSExt())
3260  else
3262  }
3263  }
3264 
3265  // Passing bools around ends up doing a trunc to i1 and passing it.
3266  // Codegen this as an argument + "and 1".
3267  MVT VT;
3268  auto *TI = dyn_cast<TruncInst>(Val);
3269  unsigned ResultReg;
3270  if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
3271  (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
3272  TI->hasOneUse()) {
3273  Value *PrevVal = TI->getOperand(0);
3274  ResultReg = getRegForValue(PrevVal);
3275 
3276  if (!ResultReg)
3277  return false;
3278 
3279  if (!isTypeLegal(PrevVal->getType(), VT))
3280  return false;
3281 
3282  ResultReg =
3283  fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
3284  } else {
3285  if (!isTypeLegal(Val->getType(), VT))
3286  return false;
3287  ResultReg = getRegForValue(Val);
3288  }
3289 
3290  if (!ResultReg)
3291  return false;
3292 
3293  ArgRegs.push_back(ResultReg);
3294  OutVTs.push_back(VT);
3295  }
3296 
3297  // Analyze operands of the call, assigning locations to each operand.
3299  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
3300 
3301  // Allocate shadow area for Win64
3302  if (IsWin64)
3303  CCInfo.AllocateStack(32, 8);
3304 
3305  CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
3306 
3307  // Get a count of how many bytes are to be pushed on the stack.
3308  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3309 
3310  // Issue CALLSEQ_START
3311  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3312  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3313  .addImm(NumBytes).addImm(0).addImm(0);
3314 
3315  // Walk the register/memloc assignments, inserting copies/loads.
3316  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3317  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3318  CCValAssign const &VA = ArgLocs[i];
3319  const Value *ArgVal = OutVals[VA.getValNo()];
3320  MVT ArgVT = OutVTs[VA.getValNo()];
3321 
3322  if (ArgVT == MVT::x86mmx)
3323  return false;
3324 
3325  unsigned ArgReg = ArgRegs[VA.getValNo()];
3326 
3327  // Promote the value if needed.
3328  switch (VA.getLocInfo()) {
3329  case CCValAssign::Full: break;
3330  case CCValAssign::SExt: {
3331  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3332  "Unexpected extend");
3333 
3334  if (ArgVT == MVT::i1)
3335  return false;
3336 
3337  bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3338  ArgVT, ArgReg);
3339  assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
3340  ArgVT = VA.getLocVT();
3341  break;
3342  }
3343  case CCValAssign::ZExt: {
3344  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3345  "Unexpected extend");
3346 
3347  // Handle zero-extension from i1 to i8, which is common.
3348  if (ArgVT == MVT::i1) {
3349  // Set the high bits to zero.
3350  ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
3351  ArgVT = MVT::i8;
3352 
3353  if (ArgReg == 0)
3354  return false;
3355  }
3356 
3357  bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3358  ArgVT, ArgReg);
3359  assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
3360  ArgVT = VA.getLocVT();
3361  break;
3362  }
3363  case CCValAssign::AExt: {
3364  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3365  "Unexpected extend");
3366  bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
3367  ArgVT, ArgReg);
3368  if (!Emitted)
3369  Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3370  ArgVT, ArgReg);
3371  if (!Emitted)
3372  Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3373  ArgVT, ArgReg);
3374 
3375  assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
3376  ArgVT = VA.getLocVT();
3377  break;
3378  }
3379  case CCValAssign::BCvt: {
3380  ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
3381  /*TODO: Kill=*/false);
3382  assert(ArgReg && "Failed to emit a bitcast!");
3383  ArgVT = VA.getLocVT();
3384  break;
3385  }
3386  case CCValAssign::VExt:
3387  // VExt has not been implemented, so this should be impossible to reach
3388  // for now. However, fallback to Selection DAG isel once implemented.
3389  return false;
3393  case CCValAssign::FPExt:
3394  case CCValAssign::Trunc:
3395  llvm_unreachable("Unexpected loc info!");
3396  case CCValAssign::Indirect:
3397  // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3398  // support this.
3399  return false;
3400  }
3401 
3402  if (VA.isRegLoc()) {
3403  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3404  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3405  OutRegs.push_back(VA.getLocReg());
3406  } else {
3407  assert(VA.isMemLoc());
3408 
3409  // Don't emit stores for undef values.
3410  if (isa<UndefValue>(ArgVal))
3411  continue;
3412 
3413  unsigned LocMemOffset = VA.getLocMemOffset();
3414  X86AddressMode AM;
3415  AM.Base.Reg = RegInfo->getStackRegister();
3416  AM.Disp = LocMemOffset;
3417  ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
3418  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3419  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3420  MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
3421  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3422  if (Flags.isByVal()) {
3423  X86AddressMode SrcAM;
3424  SrcAM.Base.Reg = ArgReg;
3425  if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
3426  return false;
3427  } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3428  // If this is a really simple value, emit this with the Value* version
3429  // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3430  // as it can cause us to reevaluate the argument.
3431  if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3432  return false;
3433  } else {
3434  bool ValIsKill = hasTrivialKill(ArgVal);
3435  if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
3436  return false;
3437  }
3438  }
3439  }
3440 
3441  // ELF / PIC requires GOT in the EBX register before function calls via PLT
3442  // GOT pointer.
3443  if (Subtarget->isPICStyleGOT()) {
3444  unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3445  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3446  TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
3447  }
3448 
3449  if (Is64Bit && IsVarArg && !IsWin64) {
3450  // From AMD64 ABI document:
3451  // For calls that may call functions that use varargs or stdargs
3452  // (prototype-less calls or calls to functions containing ellipsis (...) in
3453  // the declaration) %al is used as hidden argument to specify the number
3454  // of SSE registers used. The contents of %al do not need to match exactly
3455  // the number of registers, but must be an ubound on the number of SSE
3456  // registers used and is in the range 0 - 8 inclusive.
3457 
3458  // Count the number of XMM registers allocated.
3459  static const MCPhysReg XMMArgRegs[] = {
3460  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3461  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3462  };
3463  unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3464  assert((Subtarget->hasSSE1() || !NumXMMRegs)
3465  && "SSE registers cannot be used when SSE is disabled");
3466  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
3467  X86::AL).addImm(NumXMMRegs);
3468  }
3469 
3470  // Materialize callee address in a register. FIXME: GV address can be
3471  // handled with a CALLpcrel32 instead.
3472  X86AddressMode CalleeAM;
3473  if (!X86SelectCallAddress(Callee, CalleeAM))
3474  return false;
3475 
3476  unsigned CalleeOp = 0;
3477  const GlobalValue *GV = nullptr;
3478  if (CalleeAM.GV != nullptr) {
3479  GV = CalleeAM.GV;
3480  } else if (CalleeAM.Base.Reg != 0) {
3481  CalleeOp = CalleeAM.Base.Reg;
3482  } else
3483  return false;
3484 
3485  // Issue the call.
3486  MachineInstrBuilder MIB;
3487  if (CalleeOp) {
3488  // Register-indirect call.
3489  unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3490  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
3491  .addReg(CalleeOp);
3492  } else {
3493  // Direct call.
3494  assert(GV && "Not a direct call");
3495  // See if we need any target-specific flags on the GV operand.
3496  unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3497 
3498  // This will be a direct call, or an indirect call through memory for
3499  // NonLazyBind calls or dllimport calls.
3500  bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT ||
3501  OpFlags == X86II::MO_GOTPCREL ||
3502  OpFlags == X86II::MO_COFFSTUB;
3503  unsigned CallOpc = NeedLoad
3504  ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3505  : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3506 
3507  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
3508  if (NeedLoad)
3509  MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
3510  if (Symbol)
3511  MIB.addSym(Symbol, OpFlags);
3512  else
3513  MIB.addGlobalAddress(GV, 0, OpFlags);
3514  if (NeedLoad)
3515  MIB.addReg(0);
3516  }
3517 
3518  // Add a register mask operand representing the call-preserved registers.
3519  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3520  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3521 
3522  // Add an implicit use GOT pointer in EBX.
3523  if (Subtarget->isPICStyleGOT())
3525 
3526  if (Is64Bit && IsVarArg && !IsWin64)
3528 
3529  // Add implicit physical register uses to the call.
3530  for (auto Reg : OutRegs)
3532 
3533  // Issue CALLSEQ_END
3534  unsigned NumBytesForCalleeToPop =
3535  X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
3536  TM.Options.GuaranteedTailCallOpt)
3537  ? NumBytes // Callee pops everything.
3538  : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CS);
3539  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3540  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3541  .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
3542 
3543  // Now handle call return values.
3545  CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3546  CLI.RetTy->getContext());
3547  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3548 
3549  // Copy all of the result registers out of their specified physreg.
3550  unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3551  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3552  CCValAssign &VA = RVLocs[i];
3553  EVT CopyVT = VA.getValVT();
3554  unsigned CopyReg = ResultReg + i;
3555  Register SrcReg = VA.getLocReg();
3556 
3557  // If this is x86-64, and we disabled SSE, we can't return FP values
3558  if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3559  ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3560  report_fatal_error("SSE register return with SSE disabled");
3561  }
3562 
3563  // If we prefer to use the value in xmm registers, copy it out as f80 and
3564  // use a truncate to move it from fp stack reg to xmm reg.
3565  if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3566  isScalarFPTypeInSSEReg(VA.getValVT())) {
3567  CopyVT = MVT::f80;
3568  CopyReg = createResultReg(&X86::RFP80RegClass);
3569  }
3570 
3571  // Copy out the result.
3572  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3573  TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
3574  InRegs.push_back(VA.getLocReg());
3575 
3576  // Round the f80 to the right size, which also moves it to the appropriate
3577  // xmm register. This is accomplished by storing the f80 value in memory
3578  // and then loading it back.
3579  if (CopyVT != VA.getValVT()) {
3580  EVT ResVT = VA.getValVT();
3581  unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3582  unsigned MemSize = ResVT.getSizeInBits()/8;
3583  int FI = MFI.CreateStackObject(MemSize, MemSize, false);
3584  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3585  TII.get(Opc)), FI)
3586  .addReg(CopyReg);
3587  Opc = ResVT == MVT::f32 ? X86::MOVSSrm_alt : X86::MOVSDrm_alt;
3588  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3589  TII.get(Opc), ResultReg + i), FI);
3590  }
3591  }
3592 
3593  CLI.ResultReg = ResultReg;
3594  CLI.NumResultRegs = RVLocs.size();
3595  CLI.Call = MIB;
3596 
3597  return true;
3598 }
3599 
3600 bool
3601 X86FastISel::fastSelectInstruction(const Instruction *I) {
3602  switch (I->getOpcode()) {
3603  default: break;
3604  case Instruction::Load:
3605  return X86SelectLoad(I);
3606  case Instruction::Store:
3607  return X86SelectStore(I);
3608  case Instruction::Ret:
3609  return X86SelectRet(I);
3610  case Instruction::ICmp:
3611  case Instruction::FCmp:
3612  return X86SelectCmp(I);
3613  case Instruction::ZExt:
3614  return X86SelectZExt(I);
3615  case Instruction::SExt:
3616  return X86SelectSExt(I);
3617  case Instruction::Br:
3618  return X86SelectBranch(I);
3619  case Instruction::LShr:
3620  case Instruction::AShr:
3621  case Instruction::Shl:
3622  return X86SelectShift(I);
3623  case Instruction::SDiv:
3624  case Instruction::UDiv:
3625  case Instruction::SRem:
3626  case Instruction::URem:
3627  return X86SelectDivRem(I);
3628  case Instruction::Select:
3629  return X86SelectSelect(I);
3630  case Instruction::Trunc:
3631  return X86SelectTrunc(I);
3632  case Instruction::FPExt:
3633  return X86SelectFPExt(I);
3634  case Instruction::FPTrunc:
3635  return X86SelectFPTrunc(I);
3636  case Instruction::SIToFP:
3637  return X86SelectSIToFP(I);
3638  case Instruction::UIToFP:
3639  return X86SelectUIToFP(I);
3640  case Instruction::IntToPtr: // Deliberate fall-through.
3641  case Instruction::PtrToInt: {
3642  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3643  EVT DstVT = TLI.getValueType(DL, I->getType());
3644  if (DstVT.bitsGT(SrcVT))
3645  return X86SelectZExt(I);
3646  if (DstVT.bitsLT(SrcVT))
3647  return X86SelectTrunc(I);
3648  unsigned Reg = getRegForValue(I->getOperand(0));
3649  if (Reg == 0) return false;
3650  updateValueMap(I, Reg);
3651  return true;
3652  }
3653  case Instruction::BitCast: {
3654  // Select SSE2/AVX bitcasts between 128/256/512 bit vector types.
3655  if (!Subtarget->hasSSE2())
3656  return false;
3657 
3658  MVT SrcVT, DstVT;
3659  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT) ||
3660  !isTypeLegal(I->getType(), DstVT))
3661  return false;
3662 
3663  // Only allow vectors that use xmm/ymm/zmm.
3664  if (!SrcVT.isVector() || !DstVT.isVector() ||
3665  SrcVT.getVectorElementType() == MVT::i1 ||
3666  DstVT.getVectorElementType() == MVT::i1)
3667  return false;
3668 
3669  unsigned Reg = getRegForValue(I->getOperand(0));
3670  if (Reg == 0)
3671  return false;
3672 
3673  // No instruction is needed for conversion. Reuse the register used by
3674  // the fist operand.
3675  updateValueMap(I, Reg);
3676  return true;
3677  }
3678  }
3679 
3680  return false;
3681 }
3682 
3683 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
3684  if (VT > MVT::i64)
3685  return 0;
3686 
3687  uint64_t Imm = CI->getZExtValue();
3688  if (Imm == 0) {
3689  unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3690  switch (VT.SimpleTy) {
3691  default: llvm_unreachable("Unexpected value type");
3692  case MVT::i1:
3693  case MVT::i8:
3694  return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
3695  X86::sub_8bit);
3696  case MVT::i16:
3697  return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
3698  X86::sub_16bit);
3699  case MVT::i32:
3700  return SrcReg;
3701  case MVT::i64: {
3702  unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3703  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3704  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3705  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3706  return ResultReg;
3707  }
3708  }
3709  }
3710 
3711  unsigned Opc = 0;
3712  switch (VT.SimpleTy) {
3713  default: llvm_unreachable("Unexpected value type");
3714  case MVT::i1:
3715  VT = MVT::i8;
3717  case MVT::i8: Opc = X86::MOV8ri; break;
3718  case MVT::i16: Opc = X86::MOV16ri; break;
3719  case MVT::i32: Opc = X86::MOV32ri; break;
3720  case MVT::i64: {
3721  if (isUInt<32>(Imm))
3722  Opc = X86::MOV32ri64;
3723  else if (isInt<32>(Imm))
3724  Opc = X86::MOV64ri32;
3725  else
3726  Opc = X86::MOV64ri;
3727  break;
3728  }
3729  }
3730  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3731 }
3732 
3733 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
3734  if (CFP->isNullValue())
3735  return fastMaterializeFloatZero(CFP);
3736 
3737  // Can't handle alternate code models yet.
3738  CodeModel::Model CM = TM.getCodeModel();
3739  if (CM != CodeModel::Small && CM != CodeModel::Large)
3740  return 0;
3741 
3742  // Get opcode and regclass of the output for the given load instruction.
3743  unsigned Opc = 0;
3744  bool HasAVX = Subtarget->hasAVX();
3745  bool HasAVX512 = Subtarget->hasAVX512();
3746  switch (VT.SimpleTy) {
3747  default: return 0;
3748  case MVT::f32:
3749  if (X86ScalarSSEf32)
3750  Opc = HasAVX512 ? X86::VMOVSSZrm_alt :
3751  HasAVX ? X86::VMOVSSrm_alt :
3752  X86::MOVSSrm_alt;
3753  else
3754  Opc = X86::LD_Fp32m;
3755  break;
3756  case MVT::f64:
3757  if (X86ScalarSSEf64)
3758  Opc = HasAVX512 ? X86::VMOVSDZrm_alt :
3759  HasAVX ? X86::VMOVSDrm_alt :
3760  X86::MOVSDrm_alt;
3761  else
3762  Opc = X86::LD_Fp64m;
3763  break;
3764  case MVT::f80:
3765  // No f80 support yet.
3766  return 0;
3767  }
3768 
3769  // MachineConstantPool wants an explicit alignment.
3770  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
3771  if (Align == 0) {
3772  // Alignment of vector types. FIXME!
3773  Align = DL.getTypeAllocSize(CFP->getType());
3774  }
3775 
3776  // x86-32 PIC requires a PIC base register for constant pools.
3777  unsigned PICBase = 0;
3778  unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
3779  if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
3780  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3781  else if (OpFlag == X86II::MO_GOTOFF)
3782  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3783  else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
3784  PICBase = X86::RIP;
3785 
3786  // Create the load from the constant pool.
3787  unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
3788  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
3789 
3790  if (CM == CodeModel::Large) {
3791  unsigned AddrReg = createResultReg(&X86::GR64RegClass);
3792  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3793  AddrReg)
3794  .addConstantPoolIndex(CPI, 0, OpFlag);
3795  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3796  TII.get(Opc), ResultReg);
3797  addDirectMem(MIB, AddrReg);
3798  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3800  MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
3801  MIB->addMemOperand(*FuncInfo.MF, MMO);
3802  return ResultReg;
3803  }
3804 
3805  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3806  TII.get(Opc), ResultReg),
3807  CPI, PICBase, OpFlag);
3808  return ResultReg;
3809 }
3810 
3811 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
3812  // Can't handle alternate code models yet.
3813  if (TM.getCodeModel() != CodeModel::Small)
3814  return 0;
3815 
3816  // Materialize addresses with LEA/MOV instructions.
3817  X86AddressMode AM;
3818  if (X86SelectAddress(GV, AM)) {
3819  // If the expression is just a basereg, then we're done, otherwise we need
3820  // to emit an LEA.
3821  if (AM.BaseType == X86AddressMode::RegBase &&
3822  AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3823  return AM.Base.Reg;
3824 
3825  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3826  if (TM.getRelocationModel() == Reloc::Static &&
3827  TLI.getPointerTy(DL) == MVT::i64) {
3828  // The displacement code could be more than 32 bits away so we need to use
3829  // an instruction with a 64 bit immediate
3830  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3831  ResultReg)
3832  .addGlobalAddress(GV);
3833  } else {
3834  unsigned Opc =
3835  TLI.getPointerTy(DL) == MVT::i32
3836  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3837  : X86::LEA64r;
3838  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3839  TII.get(Opc), ResultReg), AM);
3840  }
3841  return ResultReg;
3842  }
3843  return 0;
3844 }
3845 
3846 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
3847  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
3848 
3849  // Only handle simple types.
3850  if (!CEVT.isSimple())
3851  return 0;
3852  MVT VT = CEVT.getSimpleVT();
3853 
3854  if (const auto *CI = dyn_cast<ConstantInt>(C))
3855  return X86MaterializeInt(CI, VT);
3856  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
3857  return X86MaterializeFP(CFP, VT);
3858  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
3859  return X86MaterializeGV(GV, VT);
3860 
3861  return 0;
3862 }
3863 
3864 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
3865  // Fail on dynamic allocas. At this point, getRegForValue has already
3866  // checked its CSE maps, so if we're here trying to handle a dynamic
3867  // alloca, we're not going to succeed. X86SelectAddress has a
3868  // check for dynamic allocas, because it's called directly from
3869  // various places, but targetMaterializeAlloca also needs a check
3870  // in order to avoid recursion between getRegForValue,
3871  // X86SelectAddrss, and targetMaterializeAlloca.
3872  if (!FuncInfo.StaticAllocaMap.count(C))
3873  return 0;
3874  assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3875 
3876  X86AddressMode AM;
3877  if (!X86SelectAddress(C, AM))
3878  return 0;
3879  unsigned Opc =
3880  TLI.getPointerTy(DL) == MVT::i32
3881  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3882  : X86::LEA64r;
3883  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
3884  unsigned ResultReg = createResultReg(RC);
3885  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3886  TII.get(Opc), ResultReg), AM);
3887  return ResultReg;
3888 }
3889 
3890 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
3891  MVT VT;
3892  if (!isTypeLegal(CF->getType(), VT))
3893  return 0;
3894 
3895  // Get opcode and regclass for the given zero.
3896  bool HasAVX512 = Subtarget->hasAVX512();
3897  unsigned Opc = 0;
3898  switch (VT.SimpleTy) {
3899  default: return 0;
3900  case MVT::f32:
3901  if (X86ScalarSSEf32)
3902  Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS;
3903  else
3904  Opc = X86::LD_Fp032;
3905  break;
3906  case MVT::f64:
3907  if (X86ScalarSSEf64)
3908  Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD;
3909  else
3910  Opc = X86::LD_Fp064;
3911  break;
3912  case MVT::f80:
3913  // No f80 support yet.
3914  return 0;
3915  }
3916 
3917  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3918  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
3919  return ResultReg;
3920 }
3921 
3922 
3923 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3924  const LoadInst *LI) {
3925  const Value *Ptr = LI->getPointerOperand();
3926  X86AddressMode AM;
3927  if (!X86SelectAddress(Ptr, AM))
3928  return false;
3929 
3930  const X86InstrInfo &XII = (const X86InstrInfo &)TII;
3931 
3932  unsigned Size = DL.getTypeAllocSize(LI->getType());
3933  unsigned Alignment = LI->getAlignment();
3934 
3935  if (Alignment == 0) // Ensure that codegen never sees alignment 0
3936  Alignment = DL.getABITypeAlignment(LI->getType());
3937 
3939  AM.getFullAddress(AddrOps);
3940 
3941  MachineInstr *Result = XII.foldMemoryOperandImpl(
3942  *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
3943  /*AllowCommute=*/true);
3944  if (!Result)
3945  return false;
3946 
3947  // The index register could be in the wrong register class. Unfortunately,
3948  // foldMemoryOperandImpl could have commuted the instruction so its not enough
3949  // to just look at OpNo + the offset to the index reg. We actually need to
3950  // scan the instruction to find the index reg and see if its the correct reg
3951  // class.
3952  unsigned OperandNo = 0;
3953  for (MachineInstr::mop_iterator I = Result->operands_begin(),
3954  E = Result->operands_end(); I != E; ++I, ++OperandNo) {
3955  MachineOperand &MO = *I;
3956  if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
3957  continue;
3958  // Found the index reg, now try to rewrite it.
3959  unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
3960  MO.getReg(), OperandNo);
3961  if (IndexReg == MO.getReg())
3962  continue;
3963  MO.setReg(IndexReg);
3964  }
3965 
3966  Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3967  Result->cloneInstrSymbols(*FuncInfo.MF, *MI);
3969  removeDeadCode(I, std::next(I));
3970  return true;
3971 }
3972 
3973 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
3974  const TargetRegisterClass *RC,
3975  unsigned Op0, bool Op0IsKill,
3976  unsigned Op1, bool Op1IsKill,
3977  unsigned Op2, bool Op2IsKill,
3978  unsigned Op3, bool Op3IsKill) {
3979  const MCInstrDesc &II = TII.get(MachineInstOpcode);
3980 
3981  unsigned ResultReg = createResultReg(RC);
3982  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
3983  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
3984  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
3985  Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
3986 
3987  if (II.getNumDefs() >= 1)
3988  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
3989  .addReg(Op0, getKillRegState(Op0IsKill))
3990  .addReg(Op1, getKillRegState(Op1IsKill))
3991  .addReg(Op2, getKillRegState(Op2IsKill))
3992  .addReg(Op3, getKillRegState(Op3IsKill));
3993  else {
3994  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
3995  .addReg(Op0, getKillRegState(Op0IsKill))
3996  .addReg(Op1, getKillRegState(Op1IsKill))
3997  .addReg(Op2, getKillRegState(Op2IsKill))
3998  .addReg(Op3, getKillRegState(Op3IsKill));
3999  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4000  TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
4001  }
4002  return ResultReg;
4003 }
4004 
4005 
4006 namespace llvm {
4008  const TargetLibraryInfo *libInfo) {
4009  return new X86FastISel(funcInfo, libInfo);
4010  }
4011 }
bool hasAVX() const
Definition: X86Subtarget.h:587
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
void setFrameAddressIsTaken(bool T)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:603
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:385
Return a value (possibly void), from a function.
Value * getValueOperand()
Definition: Instructions.h:415
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:548
X86_FastCall - &#39;fast&#39; analog of X86_StdCall.
Definition: CallingConv.h:96
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:607
mop_iterator operands_end()
Definition: MachineInstr.h:471
bool isInteger() const
Return true if this is an integer or a vector integer type.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:722
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "...
Definition: X86BaseInfo.h:275
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:575
unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, const MachineOperand &RegMO, unsigned OpIdx)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:40
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Register getLocReg() const
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
addConstantPoolReference - This function is used to add a reference to the base of a constant value s...
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
bool isVector() const
Return true if this is a vector value type.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
This class represents a function call, abstracting a target machine&#39;s calling convention.
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:738
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:339
unsigned getSourceAddressSpace() const
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:743
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:748
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
unsigned const TargetRegisterInfo * TRI
const GlobalValue * GV
A debug info location.
Definition: DebugLoc.h:33
F(f)
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const
An instruction for reading from memory.
Definition: Instructions.h:169
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:147
Hexagon Common GEP
Value * getCondition() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
Value * getLength() const
op_iterator op_begin()
Definition: User.h:229
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:38
bool isMemLoc() const
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:289
union llvm::X86AddressMode::@524 Base
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1241
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:743
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:585
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:742
A description of a memory reference used in the backend.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:831
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:226
const HexagonInstrInfo * TII
X86_StdCall - stdcall is the calling conventions mostly used by the Win32 API.
Definition: CallingConv.h:91
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:413
Class to represent struct types.
Definition: DerivedTypes.h:238
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
Definition: X86InstrInfo.h:77
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:739
SimpleValueType SimpleTy
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1682
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:148
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
unsigned getSizeInBits() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:261
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
Definition: X86InstrInfo.h:94
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1696
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:66
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:407
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
This instruction compares its operands according to the predicate given to the constructor.
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:385
An instruction for storing to memory.
Definition: Instructions.h:325
void setReg(Register Reg)
Change the register this operand corresponds to.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:1020
bool doesNoCfCheck() const
Determine if the call should not perform indirect branch tracking.
Definition: InstrTypes.h:1686
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
This class represents a truncation of integer types.
MVT getVectorElementType() const
Value * getOperand(unsigned i) const
Definition: User.h:169
Class to represent pointers.
Definition: DerivedTypes.h:579
unsigned getByValSize() const
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:131
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given register size in bytes, and operand type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:228
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:881
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:168
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
unsigned const MachineRegisterInfo * MRI
void cloneInstrSymbols(MachineFunction &MF, const MachineInstr &MI)
Clone another MachineInstr&#39;s pre- and post- instruction symbols and replace ours with it...
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Conditional or Unconditional Branch instruction.
Value * getAddress() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:189
DIExpression * getExpression() const
bool isValidLocationForIntrinsic(const DILocation *DL) const
Check that a location is valid for this variable.
op_iterator op_end()
Definition: User.h:231
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
static std::pair< unsigned, bool > getX86SSEConditionCode(CmpInst::Predicate Predicate)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
constexpr double e
Definition: MathExtras.h:57
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:806
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:741
Value * getPointerOperand()
Definition: Instructions.h:289
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:754
static void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, X86AddressMode &AM)
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:749
bool isTargetMCU() const
Definition: X86Subtarget.h:773
Extended Value Type.
Definition: ValueTypes.h:33
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:52
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1373
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:747
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
bool arg_empty() const
Definition: CallSite.h:225
The memory access writes data.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:736
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:345
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:134
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:746
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
This class wraps the llvm.memcpy intrinsic.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:241
Value * getRawSource() const
Return the arguments to the instruction.
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
amdgpu Simplify well known AMD library false FunctionCallee Callee
This file defines the FastISel class.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:240
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:507
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:510
The memory access reads data.
Representation of each machine instruction.
Definition: MachineInstr.h:63
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:139
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – t...
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1239
X86_ThisCall - Similar to X86_StdCall.
Definition: CallingConv.h:115
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:331
unsigned AllocateStack(unsigned Size, unsigned Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:158
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:426
unsigned getDestAddressSpace() const
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:607
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:242
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:180
unsigned getLocMemOffset() const
Establish a view to a call site for examination.
Definition: CallSite.h:906
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1287
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
Tail - This calling convention attemps to make calls as fast as possible while guaranteeing that tail...
Definition: CallingConv.h:81
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:740
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
DILocalVariable * getVariable() const
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:744
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:382
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:368
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegLoc() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasSSE1() const
Definition: X86Subtarget.h:581
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:735
LLVM Value Representation.
Definition: Value.h:74
mop_iterator operands_begin()
Definition: MachineInstr.h:470
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:745
enum llvm::X86AddressMode::@523 BaseType
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
If we have a comparison with RHS as the RHS of the comparison, return an opcode that works for the co...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
bool hasAVX512() const
Definition: X86Subtarget.h:589
Invoke instruction.
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:125
IRTranslator LLVM IR MI
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Register getReg() const
getReg - Returns the register number.
Conversion operators.
Definition: ISDOpcodes.h:504
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:156
X86AddressMode - This struct holds a generalized full x86 address mode.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:737
This represents the llvm.dbg.declare instruction.
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:150
Value * getPointerOperand()
Definition: Instructions.h:418
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Value * getRawDest() const
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:407
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
Register getStackRegister() const
bool hasSSE2() const
Definition: X86Subtarget.h:582
iterator_range< arg_iterator > args()
Definition: Function.h:724
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:734
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:218
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
Definition: X86BaseInfo.h:237
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, ImmutableCallSite *CS)
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:221
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
an instruction to allocate memory on the stack
Definition: Instructions.h:59
gep_type_iterator gep_type_begin(const User *GEP)