LLVM  9.0.0svn
X86FastISel.cpp
Go to the documentation of this file.
1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the X86-specific support for the FastISel class. Much
10 // of the target-specific code is generated by tablegen in the file
11 // X86GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86CallingConv.h"
17 #include "X86InstrBuilder.h"
18 #include "X86InstrInfo.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86RegisterInfo.h"
21 #include "X86Subtarget.h"
22 #include "X86TargetMachine.h"
24 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/IR/CallSite.h"
30 #include "llvm/IR/CallingConv.h"
31 #include "llvm/IR/DebugInfo.h"
32 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalAlias.h"
35 #include "llvm/IR/GlobalVariable.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/MC/MCAsmInfo.h"
40 #include "llvm/MC/MCSymbol.h"
43 using namespace llvm;
44 
45 namespace {
46 
47 class X86FastISel final : public FastISel {
48  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
49  /// make the right decision when generating code for different targets.
50  const X86Subtarget *Subtarget;
51 
52  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
53  /// floating point ops.
54  /// When SSE is available, use it for f32 operations.
55  /// When SSE2 is available, use it for f64 operations.
56  bool X86ScalarSSEf64;
57  bool X86ScalarSSEf32;
58 
59 public:
60  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
61  const TargetLibraryInfo *libInfo)
62  : FastISel(funcInfo, libInfo) {
63  Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
64  X86ScalarSSEf64 = Subtarget->hasSSE2();
65  X86ScalarSSEf32 = Subtarget->hasSSE1();
66  }
67 
68  bool fastSelectInstruction(const Instruction *I) override;
69 
70  /// The specified machine instr operand is a vreg, and that
71  /// vreg is being provided by the specified load instruction. If possible,
72  /// try to fold the load as an operand to the instruction, returning true if
73  /// possible.
74  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
75  const LoadInst *LI) override;
76 
77  bool fastLowerArguments() override;
78  bool fastLowerCall(CallLoweringInfo &CLI) override;
79  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
80 
81 #include "X86GenFastISel.inc"
82 
83 private:
84  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
85  const DebugLoc &DL);
86 
87  bool X86FastEmitLoad(MVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
88  unsigned &ResultReg, unsigned Alignment = 1);
89 
90  bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
91  MachineMemOperand *MMO = nullptr, bool Aligned = false);
92  bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
93  X86AddressMode &AM,
94  MachineMemOperand *MMO = nullptr, bool Aligned = false);
95 
96  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
97  unsigned &ResultReg);
98 
99  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
100  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
101 
102  bool X86SelectLoad(const Instruction *I);
103 
104  bool X86SelectStore(const Instruction *I);
105 
106  bool X86SelectRet(const Instruction *I);
107 
108  bool X86SelectCmp(const Instruction *I);
109 
110  bool X86SelectZExt(const Instruction *I);
111 
112  bool X86SelectSExt(const Instruction *I);
113 
114  bool X86SelectBranch(const Instruction *I);
115 
116  bool X86SelectShift(const Instruction *I);
117 
118  bool X86SelectDivRem(const Instruction *I);
119 
120  bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
121 
122  bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
123 
124  bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
125 
126  bool X86SelectSelect(const Instruction *I);
127 
128  bool X86SelectTrunc(const Instruction *I);
129 
130  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
131  const TargetRegisterClass *RC);
132 
133  bool X86SelectFPExt(const Instruction *I);
134  bool X86SelectFPTrunc(const Instruction *I);
135  bool X86SelectSIToFP(const Instruction *I);
136  bool X86SelectUIToFP(const Instruction *I);
137  bool X86SelectIntToFP(const Instruction *I, bool IsSigned);
138 
139  const X86InstrInfo *getInstrInfo() const {
140  return Subtarget->getInstrInfo();
141  }
142  const X86TargetMachine *getTargetMachine() const {
143  return static_cast<const X86TargetMachine *>(&TM);
144  }
145 
146  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
147 
148  unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
149  unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
150  unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
151  unsigned fastMaterializeConstant(const Constant *C) override;
152 
153  unsigned fastMaterializeAlloca(const AllocaInst *C) override;
154 
155  unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
156 
157  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
158  /// computed in an SSE register, not on the X87 floating point stack.
159  bool isScalarFPTypeInSSEReg(EVT VT) const {
160  return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
161  (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
162  }
163 
164  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
165 
166  bool IsMemcpySmall(uint64_t Len);
167 
168  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
169  X86AddressMode SrcAM, uint64_t Len);
170 
171  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
172  const Value *Cond);
173 
175  X86AddressMode &AM);
176 
177  unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
178  const TargetRegisterClass *RC, unsigned Op0,
179  bool Op0IsKill, unsigned Op1, bool Op1IsKill,
180  unsigned Op2, bool Op2IsKill, unsigned Op3,
181  bool Op3IsKill);
182 };
183 
184 } // end anonymous namespace.
185 
186 static std::pair<unsigned, bool>
188  unsigned CC;
189  bool NeedSwap = false;
190 
191  // SSE Condition code mapping:
192  // 0 - EQ
193  // 1 - LT
194  // 2 - LE
195  // 3 - UNORD
196  // 4 - NEQ
197  // 5 - NLT
198  // 6 - NLE
199  // 7 - ORD
200  switch (Predicate) {
201  default: llvm_unreachable("Unexpected predicate");
202  case CmpInst::FCMP_OEQ: CC = 0; break;
203  case CmpInst::FCMP_OGT: NeedSwap = true; LLVM_FALLTHROUGH;
204  case CmpInst::FCMP_OLT: CC = 1; break;
205  case CmpInst::FCMP_OGE: NeedSwap = true; LLVM_FALLTHROUGH;
206  case CmpInst::FCMP_OLE: CC = 2; break;
207  case CmpInst::FCMP_UNO: CC = 3; break;
208  case CmpInst::FCMP_UNE: CC = 4; break;
209  case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH;
210  case CmpInst::FCMP_UGE: CC = 5; break;
211  case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
212  case CmpInst::FCMP_UGT: CC = 6; break;
213  case CmpInst::FCMP_ORD: CC = 7; break;
214  case CmpInst::FCMP_UEQ: CC = 8; break;
215  case CmpInst::FCMP_ONE: CC = 12; break;
216  }
217 
218  return std::make_pair(CC, NeedSwap);
219 }
220 
221 /// Adds a complex addressing mode to the given machine instr builder.
222 /// Note, this will constrain the index register. If its not possible to
223 /// constrain the given index register, then a new one will be created. The
224 /// IndexReg field of the addressing mode will be updated to match in this case.
225 const MachineInstrBuilder &
227  X86AddressMode &AM) {
228  // First constrain the index register. It needs to be a GR64_NOSP.
230  MIB->getNumOperands() +
232  return ::addFullAddress(MIB, AM);
233 }
234 
235 /// Check if it is possible to fold the condition from the XALU intrinsic
236 /// into the user. The condition code will only be updated on success.
237 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
238  const Value *Cond) {
239  if (!isa<ExtractValueInst>(Cond))
240  return false;
241 
242  const auto *EV = cast<ExtractValueInst>(Cond);
243  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
244  return false;
245 
246  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
247  MVT RetVT;
248  const Function *Callee = II->getCalledFunction();
249  Type *RetTy =
250  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
251  if (!isTypeLegal(RetTy, RetVT))
252  return false;
253 
254  if (RetVT != MVT::i32 && RetVT != MVT::i64)
255  return false;
256 
257  X86::CondCode TmpCC;
258  switch (II->getIntrinsicID()) {
259  default: return false;
260  case Intrinsic::sadd_with_overflow:
261  case Intrinsic::ssub_with_overflow:
262  case Intrinsic::smul_with_overflow:
263  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
264  case Intrinsic::uadd_with_overflow:
265  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
266  }
267 
268  // Check if both instructions are in the same basic block.
269  if (II->getParent() != I->getParent())
270  return false;
271 
272  // Make sure nothing is in the way
275  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
276  // We only expect extractvalue instructions between the intrinsic and the
277  // instruction to be selected.
278  if (!isa<ExtractValueInst>(Itr))
279  return false;
280 
281  // Check that the extractvalue operand comes from the intrinsic.
282  const auto *EVI = cast<ExtractValueInst>(Itr);
283  if (EVI->getAggregateOperand() != II)
284  return false;
285  }
286 
287  CC = TmpCC;
288  return true;
289 }
290 
291 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
292  EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
293  if (evt == MVT::Other || !evt.isSimple())
294  // Unhandled type. Halt "fast" selection and bail.
295  return false;
296 
297  VT = evt.getSimpleVT();
298  // For now, require SSE/SSE2 for performing floating-point operations,
299  // since x87 requires additional work.
300  if (VT == MVT::f64 && !X86ScalarSSEf64)
301  return false;
302  if (VT == MVT::f32 && !X86ScalarSSEf32)
303  return false;
304  // Similarly, no f80 support yet.
305  if (VT == MVT::f80)
306  return false;
307  // We only handle legal types. For example, on x86-32 the instruction
308  // selector contains all of the 64-bit instructions from x86-64,
309  // under the assumption that i64 won't be used if the target doesn't
310  // support it.
311  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
312 }
313 
314 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
315 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
316 /// Return true and the result register by reference if it is possible.
317 bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM,
318  MachineMemOperand *MMO, unsigned &ResultReg,
319  unsigned Alignment) {
320  bool HasSSE41 = Subtarget->hasSSE41();
321  bool HasAVX = Subtarget->hasAVX();
322  bool HasAVX2 = Subtarget->hasAVX2();
323  bool HasAVX512 = Subtarget->hasAVX512();
324  bool HasVLX = Subtarget->hasVLX();
325  bool IsNonTemporal = MMO && MMO->isNonTemporal();
326 
327  // Treat i1 loads the same as i8 loads. Masking will be done when storing.
328  if (VT == MVT::i1)
329  VT = MVT::i8;
330 
331  // Get opcode and regclass of the output for the given load instruction.
332  unsigned Opc = 0;
333  switch (VT.SimpleTy) {
334  default: return false;
335  case MVT::i8:
336  Opc = X86::MOV8rm;
337  break;
338  case MVT::i16:
339  Opc = X86::MOV16rm;
340  break;
341  case MVT::i32:
342  Opc = X86::MOV32rm;
343  break;
344  case MVT::i64:
345  // Must be in x86-64 mode.
346  Opc = X86::MOV64rm;
347  break;
348  case MVT::f32:
349  if (X86ScalarSSEf32)
350  Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
351  else
352  Opc = X86::LD_Fp32m;
353  break;
354  case MVT::f64:
355  if (X86ScalarSSEf64)
356  Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
357  else
358  Opc = X86::LD_Fp64m;
359  break;
360  case MVT::f80:
361  // No f80 support yet.
362  return false;
363  case MVT::v4f32:
364  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
365  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
366  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
367  else if (Alignment >= 16)
368  Opc = HasVLX ? X86::VMOVAPSZ128rm :
369  HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
370  else
371  Opc = HasVLX ? X86::VMOVUPSZ128rm :
372  HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
373  break;
374  case MVT::v2f64:
375  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
376  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
377  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
378  else if (Alignment >= 16)
379  Opc = HasVLX ? X86::VMOVAPDZ128rm :
380  HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
381  else
382  Opc = HasVLX ? X86::VMOVUPDZ128rm :
383  HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
384  break;
385  case MVT::v4i32:
386  case MVT::v2i64:
387  case MVT::v8i16:
388  case MVT::v16i8:
389  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
390  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
391  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
392  else if (Alignment >= 16)
393  Opc = HasVLX ? X86::VMOVDQA64Z128rm :
394  HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
395  else
396  Opc = HasVLX ? X86::VMOVDQU64Z128rm :
397  HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
398  break;
399  case MVT::v8f32:
400  assert(HasAVX);
401  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
402  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
403  else if (IsNonTemporal && Alignment >= 16)
404  return false; // Force split for X86::VMOVNTDQArm
405  else if (Alignment >= 32)
406  Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
407  else
408  Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
409  break;
410  case MVT::v4f64:
411  assert(HasAVX);
412  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
413  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
414  else if (IsNonTemporal && Alignment >= 16)
415  return false; // Force split for X86::VMOVNTDQArm
416  else if (Alignment >= 32)
417  Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
418  else
419  Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
420  break;
421  case MVT::v8i32:
422  case MVT::v4i64:
423  case MVT::v16i16:
424  case MVT::v32i8:
425  assert(HasAVX);
426  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
427  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
428  else if (IsNonTemporal && Alignment >= 16)
429  return false; // Force split for X86::VMOVNTDQArm
430  else if (Alignment >= 32)
431  Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
432  else
433  Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
434  break;
435  case MVT::v16f32:
436  assert(HasAVX512);
437  if (IsNonTemporal && Alignment >= 64)
438  Opc = X86::VMOVNTDQAZrm;
439  else
440  Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
441  break;
442  case MVT::v8f64:
443  assert(HasAVX512);
444  if (IsNonTemporal && Alignment >= 64)
445  Opc = X86::VMOVNTDQAZrm;
446  else
447  Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
448  break;
449  case MVT::v8i64:
450  case MVT::v16i32:
451  case MVT::v32i16:
452  case MVT::v64i8:
453  assert(HasAVX512);
454  // Note: There are a lot more choices based on type with AVX-512, but
455  // there's really no advantage when the load isn't masked.
456  if (IsNonTemporal && Alignment >= 64)
457  Opc = X86::VMOVNTDQAZrm;
458  else
459  Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
460  break;
461  }
462 
463  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
464 
465  ResultReg = createResultReg(RC);
466  MachineInstrBuilder MIB =
467  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
468  addFullAddress(MIB, AM);
469  if (MMO)
470  MIB->addMemOperand(*FuncInfo.MF, MMO);
471  return true;
472 }
473 
474 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
475 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
476 /// and a displacement offset, or a GlobalAddress,
477 /// i.e. V. Return true if it is possible.
478 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
479  X86AddressMode &AM,
480  MachineMemOperand *MMO, bool Aligned) {
481  bool HasSSE1 = Subtarget->hasSSE1();
482  bool HasSSE2 = Subtarget->hasSSE2();
483  bool HasSSE4A = Subtarget->hasSSE4A();
484  bool HasAVX = Subtarget->hasAVX();
485  bool HasAVX512 = Subtarget->hasAVX512();
486  bool HasVLX = Subtarget->hasVLX();
487  bool IsNonTemporal = MMO && MMO->isNonTemporal();
488 
489  // Get opcode and regclass of the output for the given store instruction.
490  unsigned Opc = 0;
491  switch (VT.getSimpleVT().SimpleTy) {
492  case MVT::f80: // No f80 support yet.
493  default: return false;
494  case MVT::i1: {
495  // Mask out all but lowest bit.
496  unsigned AndResult = createResultReg(&X86::GR8RegClass);
497  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
498  TII.get(X86::AND8ri), AndResult)
499  .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
500  ValReg = AndResult;
501  LLVM_FALLTHROUGH; // handle i1 as i8.
502  }
503  case MVT::i8: Opc = X86::MOV8mr; break;
504  case MVT::i16: Opc = X86::MOV16mr; break;
505  case MVT::i32:
506  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
507  break;
508  case MVT::i64:
509  // Must be in x86-64 mode.
510  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
511  break;
512  case MVT::f32:
513  if (X86ScalarSSEf32) {
514  if (IsNonTemporal && HasSSE4A)
515  Opc = X86::MOVNTSS;
516  else
517  Opc = HasAVX512 ? X86::VMOVSSZmr :
518  HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
519  } else
520  Opc = X86::ST_Fp32m;
521  break;
522  case MVT::f64:
523  if (X86ScalarSSEf32) {
524  if (IsNonTemporal && HasSSE4A)
525  Opc = X86::MOVNTSD;
526  else
527  Opc = HasAVX512 ? X86::VMOVSDZmr :
528  HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
529  } else
530  Opc = X86::ST_Fp64m;
531  break;
532  case MVT::x86mmx:
533  Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
534  break;
535  case MVT::v4f32:
536  if (Aligned) {
537  if (IsNonTemporal)
538  Opc = HasVLX ? X86::VMOVNTPSZ128mr :
539  HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
540  else
541  Opc = HasVLX ? X86::VMOVAPSZ128mr :
542  HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
543  } else
544  Opc = HasVLX ? X86::VMOVUPSZ128mr :
545  HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
546  break;
547  case MVT::v2f64:
548  if (Aligned) {
549  if (IsNonTemporal)
550  Opc = HasVLX ? X86::VMOVNTPDZ128mr :
551  HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
552  else
553  Opc = HasVLX ? X86::VMOVAPDZ128mr :
554  HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
555  } else
556  Opc = HasVLX ? X86::VMOVUPDZ128mr :
557  HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
558  break;
559  case MVT::v4i32:
560  case MVT::v2i64:
561  case MVT::v8i16:
562  case MVT::v16i8:
563  if (Aligned) {
564  if (IsNonTemporal)
565  Opc = HasVLX ? X86::VMOVNTDQZ128mr :
566  HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
567  else
568  Opc = HasVLX ? X86::VMOVDQA64Z128mr :
569  HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
570  } else
571  Opc = HasVLX ? X86::VMOVDQU64Z128mr :
572  HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
573  break;
574  case MVT::v8f32:
575  assert(HasAVX);
576  if (Aligned) {
577  if (IsNonTemporal)
578  Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
579  else
580  Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
581  } else
582  Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
583  break;
584  case MVT::v4f64:
585  assert(HasAVX);
586  if (Aligned) {
587  if (IsNonTemporal)
588  Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
589  else
590  Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
591  } else
592  Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
593  break;
594  case MVT::v8i32:
595  case MVT::v4i64:
596  case MVT::v16i16:
597  case MVT::v32i8:
598  assert(HasAVX);
599  if (Aligned) {
600  if (IsNonTemporal)
601  Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
602  else
603  Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
604  } else
605  Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
606  break;
607  case MVT::v16f32:
608  assert(HasAVX512);
609  if (Aligned)
610  Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
611  else
612  Opc = X86::VMOVUPSZmr;
613  break;
614  case MVT::v8f64:
615  assert(HasAVX512);
616  if (Aligned) {
617  Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
618  } else
619  Opc = X86::VMOVUPDZmr;
620  break;
621  case MVT::v8i64:
622  case MVT::v16i32:
623  case MVT::v32i16:
624  case MVT::v64i8:
625  assert(HasAVX512);
626  // Note: There are a lot more choices based on type with AVX-512, but
627  // there's really no advantage when the store isn't masked.
628  if (Aligned)
629  Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
630  else
631  Opc = X86::VMOVDQU64Zmr;
632  break;
633  }
634 
635  const MCInstrDesc &Desc = TII.get(Opc);
636  // Some of the instructions in the previous switch use FR128 instead
637  // of FR32 for ValReg. Make sure the register we feed the instruction
638  // matches its register class constraints.
639  // Note: This is fine to do a copy from FR32 to FR128, this is the
640  // same registers behind the scene and actually why it did not trigger
641  // any bugs before.
642  ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
643  MachineInstrBuilder MIB =
644  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc);
645  addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
646  if (MMO)
647  MIB->addMemOperand(*FuncInfo.MF, MMO);
648 
649  return true;
650 }
651 
652 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
653  X86AddressMode &AM,
654  MachineMemOperand *MMO, bool Aligned) {
655  // Handle 'null' like i32/i64 0.
656  if (isa<ConstantPointerNull>(Val))
657  Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
658 
659  // If this is a store of a simple constant, fold the constant into the store.
660  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
661  unsigned Opc = 0;
662  bool Signed = true;
663  switch (VT.getSimpleVT().SimpleTy) {
664  default: break;
665  case MVT::i1:
666  Signed = false;
667  LLVM_FALLTHROUGH; // Handle as i8.
668  case MVT::i8: Opc = X86::MOV8mi; break;
669  case MVT::i16: Opc = X86::MOV16mi; break;
670  case MVT::i32: Opc = X86::MOV32mi; break;
671  case MVT::i64:
672  // Must be a 32-bit sign extended value.
673  if (isInt<32>(CI->getSExtValue()))
674  Opc = X86::MOV64mi32;
675  break;
676  }
677 
678  if (Opc) {
679  MachineInstrBuilder MIB =
680  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
681  addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
682  : CI->getZExtValue());
683  if (MMO)
684  MIB->addMemOperand(*FuncInfo.MF, MMO);
685  return true;
686  }
687  }
688 
689  unsigned ValReg = getRegForValue(Val);
690  if (ValReg == 0)
691  return false;
692 
693  bool ValKill = hasTrivialKill(Val);
694  return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
695 }
696 
697 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
698 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
699 /// ISD::SIGN_EXTEND).
700 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
701  unsigned Src, EVT SrcVT,
702  unsigned &ResultReg) {
703  unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
704  Src, /*TODO: Kill=*/false);
705  if (RR == 0)
706  return false;
707 
708  ResultReg = RR;
709  return true;
710 }
711 
712 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
713  // Handle constant address.
714  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
715  // Can't handle alternate code models yet.
716  if (TM.getCodeModel() != CodeModel::Small)
717  return false;
718 
719  // Can't handle TLS yet.
720  if (GV->isThreadLocal())
721  return false;
722 
723  // Can't handle !absolute_symbol references yet.
724  if (GV->isAbsoluteSymbolRef())
725  return false;
726 
727  // RIP-relative addresses can't have additional register operands, so if
728  // we've already folded stuff into the addressing mode, just force the
729  // global value into its own register, which we can use as the basereg.
730  if (!Subtarget->isPICStyleRIPRel() ||
731  (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
732  // Okay, we've committed to selecting this global. Set up the address.
733  AM.GV = GV;
734 
735  // Allow the subtarget to classify the global.
736  unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
737 
738  // If this reference is relative to the pic base, set it now.
739  if (isGlobalRelativeToPICBase(GVFlags)) {
740  // FIXME: How do we know Base.Reg is free??
741  AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
742  }
743 
744  // Unless the ABI requires an extra load, return a direct reference to
745  // the global.
746  if (!isGlobalStubReference(GVFlags)) {
747  if (Subtarget->isPICStyleRIPRel()) {
748  // Use rip-relative addressing if we can. Above we verified that the
749  // base and index registers are unused.
750  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
751  AM.Base.Reg = X86::RIP;
752  }
753  AM.GVOpFlags = GVFlags;
754  return true;
755  }
756 
757  // Ok, we need to do a load from a stub. If we've already loaded from
758  // this stub, reuse the loaded pointer, otherwise emit the load now.
759  DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
760  unsigned LoadReg;
761  if (I != LocalValueMap.end() && I->second != 0) {
762  LoadReg = I->second;
763  } else {
764  // Issue load from stub.
765  unsigned Opc = 0;
766  const TargetRegisterClass *RC = nullptr;
767  X86AddressMode StubAM;
768  StubAM.Base.Reg = AM.Base.Reg;
769  StubAM.GV = GV;
770  StubAM.GVOpFlags = GVFlags;
771 
772  // Prepare for inserting code in the local-value area.
773  SavePoint SaveInsertPt = enterLocalValueArea();
774 
775  if (TLI.getPointerTy(DL) == MVT::i64) {
776  Opc = X86::MOV64rm;
777  RC = &X86::GR64RegClass;
778 
779  if (Subtarget->isPICStyleRIPRel())
780  StubAM.Base.Reg = X86::RIP;
781  } else {
782  Opc = X86::MOV32rm;
783  RC = &X86::GR32RegClass;
784  }
785 
786  LoadReg = createResultReg(RC);
787  MachineInstrBuilder LoadMI =
788  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
789  addFullAddress(LoadMI, StubAM);
790 
791  // Ok, back to normal mode.
792  leaveLocalValueArea(SaveInsertPt);
793 
794  // Prevent loading GV stub multiple times in same MBB.
795  LocalValueMap[V] = LoadReg;
796  }
797 
798  // Now construct the final address. Note that the Disp, Scale,
799  // and Index values may already be set here.
800  AM.Base.Reg = LoadReg;
801  AM.GV = nullptr;
802  return true;
803  }
804  }
805 
806  // If all else fails, try to materialize the value in a register.
807  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
808  if (AM.Base.Reg == 0) {
809  AM.Base.Reg = getRegForValue(V);
810  return AM.Base.Reg != 0;
811  }
812  if (AM.IndexReg == 0) {
813  assert(AM.Scale == 1 && "Scale with no index!");
814  AM.IndexReg = getRegForValue(V);
815  return AM.IndexReg != 0;
816  }
817  }
818 
819  return false;
820 }
821 
822 /// X86SelectAddress - Attempt to fill in an address from the given value.
823 ///
826 redo_gep:
827  const User *U = nullptr;
828  unsigned Opcode = Instruction::UserOp1;
829  if (const Instruction *I = dyn_cast<Instruction>(V)) {
830  // Don't walk into other basic blocks; it's possible we haven't
831  // visited them yet, so the instructions may not yet be assigned
832  // virtual registers.
833  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
834  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
835  Opcode = I->getOpcode();
836  U = I;
837  }
838  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
839  Opcode = C->getOpcode();
840  U = C;
841  }
842 
843  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
844  if (Ty->getAddressSpace() > 255)
845  // Fast instruction selection doesn't support the special
846  // address spaces.
847  return false;
848 
849  switch (Opcode) {
850  default: break;
851  case Instruction::BitCast:
852  // Look past bitcasts.
853  return X86SelectAddress(U->getOperand(0), AM);
854 
855  case Instruction::IntToPtr:
856  // Look past no-op inttoptrs.
857  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
858  TLI.getPointerTy(DL))
859  return X86SelectAddress(U->getOperand(0), AM);
860  break;
861 
862  case Instruction::PtrToInt:
863  // Look past no-op ptrtoints.
864  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
865  return X86SelectAddress(U->getOperand(0), AM);
866  break;
867 
868  case Instruction::Alloca: {
869  // Do static allocas.
870  const AllocaInst *A = cast<AllocaInst>(V);
872  FuncInfo.StaticAllocaMap.find(A);
873  if (SI != FuncInfo.StaticAllocaMap.end()) {
875  AM.Base.FrameIndex = SI->second;
876  return true;
877  }
878  break;
879  }
880 
881  case Instruction::Add: {
882  // Adds of constants are common and easy enough.
883  if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
884  uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
885  // They have to fit in the 32-bit signed displacement field though.
886  if (isInt<32>(Disp)) {
887  AM.Disp = (uint32_t)Disp;
888  return X86SelectAddress(U->getOperand(0), AM);
889  }
890  }
891  break;
892  }
893 
894  case Instruction::GetElementPtr: {
895  X86AddressMode SavedAM = AM;
896 
897  // Pattern-match simple GEPs.
898  uint64_t Disp = (int32_t)AM.Disp;
899  unsigned IndexReg = AM.IndexReg;
900  unsigned Scale = AM.Scale;
902  // Iterate through the indices, folding what we can. Constants can be
903  // folded, and one dynamic index can be handled, if the scale is supported.
904  for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
905  i != e; ++i, ++GTI) {
906  const Value *Op = *i;
907  if (StructType *STy = GTI.getStructTypeOrNull()) {
908  const StructLayout *SL = DL.getStructLayout(STy);
909  Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
910  continue;
911  }
912 
913  // A array/variable index is always of the form i*S where S is the
914  // constant scale size. See if we can push the scale into immediates.
915  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
916  for (;;) {
917  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
918  // Constant-offset addressing.
919  Disp += CI->getSExtValue() * S;
920  break;
921  }
922  if (canFoldAddIntoGEP(U, Op)) {
923  // A compatible add with a constant operand. Fold the constant.
924  ConstantInt *CI =
925  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
926  Disp += CI->getSExtValue() * S;
927  // Iterate on the other operand.
928  Op = cast<AddOperator>(Op)->getOperand(0);
929  continue;
930  }
931  if (IndexReg == 0 &&
932  (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
933  (S == 1 || S == 2 || S == 4 || S == 8)) {
934  // Scaled-index addressing.
935  Scale = S;
936  IndexReg = getRegForGEPIndex(Op).first;
937  if (IndexReg == 0)
938  return false;
939  break;
940  }
941  // Unsupported.
942  goto unsupported_gep;
943  }
944  }
945 
946  // Check for displacement overflow.
947  if (!isInt<32>(Disp))
948  break;
949 
950  AM.IndexReg = IndexReg;
951  AM.Scale = Scale;
952  AM.Disp = (uint32_t)Disp;
953  GEPs.push_back(V);
954 
955  if (const GetElementPtrInst *GEP =
956  dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
957  // Ok, the GEP indices were covered by constant-offset and scaled-index
958  // addressing. Update the address state and move on to examining the base.
959  V = GEP;
960  goto redo_gep;
961  } else if (X86SelectAddress(U->getOperand(0), AM)) {
962  return true;
963  }
964 
965  // If we couldn't merge the gep value into this addr mode, revert back to
966  // our address and just match the value instead of completely failing.
967  AM = SavedAM;
968 
969  for (const Value *I : reverse(GEPs))
970  if (handleConstantAddresses(I, AM))
971  return true;
972 
973  return false;
974  unsupported_gep:
975  // Ok, the GEP indices weren't all covered.
976  break;
977  }
978  }
979 
980  return handleConstantAddresses(V, AM);
981 }
982 
983 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
984 ///
985 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
986  const User *U = nullptr;
987  unsigned Opcode = Instruction::UserOp1;
988  const Instruction *I = dyn_cast<Instruction>(V);
989  // Record if the value is defined in the same basic block.
990  //
991  // This information is crucial to know whether or not folding an
992  // operand is valid.
993  // Indeed, FastISel generates or reuses a virtual register for all
994  // operands of all instructions it selects. Obviously, the definition and
995  // its uses must use the same virtual register otherwise the produced
996  // code is incorrect.
997  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
998  // registers for values that are alive across basic blocks. This ensures
999  // that the values are consistently set between across basic block, even
1000  // if different instruction selection mechanisms are used (e.g., a mix of
1001  // SDISel and FastISel).
1002  // For values local to a basic block, the instruction selection process
1003  // generates these virtual registers with whatever method is appropriate
1004  // for its needs. In particular, FastISel and SDISel do not share the way
1005  // local virtual registers are set.
1006  // Therefore, this is impossible (or at least unsafe) to share values
1007  // between basic blocks unless they use the same instruction selection
1008  // method, which is not guarantee for X86.
1009  // Moreover, things like hasOneUse could not be used accurately, if we
1010  // allow to reference values across basic blocks whereas they are not
1011  // alive across basic blocks initially.
1012  bool InMBB = true;
1013  if (I) {
1014  Opcode = I->getOpcode();
1015  U = I;
1016  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
1017  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
1018  Opcode = C->getOpcode();
1019  U = C;
1020  }
1021 
1022  switch (Opcode) {
1023  default: break;
1024  case Instruction::BitCast:
1025  // Look past bitcasts if its operand is in the same BB.
1026  if (InMBB)
1027  return X86SelectCallAddress(U->getOperand(0), AM);
1028  break;
1029 
1030  case Instruction::IntToPtr:
1031  // Look past no-op inttoptrs if its operand is in the same BB.
1032  if (InMBB &&
1033  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
1034  TLI.getPointerTy(DL))
1035  return X86SelectCallAddress(U->getOperand(0), AM);
1036  break;
1037 
1038  case Instruction::PtrToInt:
1039  // Look past no-op ptrtoints if its operand is in the same BB.
1040  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
1041  return X86SelectCallAddress(U->getOperand(0), AM);
1042  break;
1043  }
1044 
1045  // Handle constant address.
1046  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1047  // Can't handle alternate code models yet.
1048  if (TM.getCodeModel() != CodeModel::Small)
1049  return false;
1050 
1051  // RIP-relative addresses can't have additional register operands.
1052  if (Subtarget->isPICStyleRIPRel() &&
1053  (AM.Base.Reg != 0 || AM.IndexReg != 0))
1054  return false;
1055 
1056  // Can't handle TLS.
1057  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
1058  if (GVar->isThreadLocal())
1059  return false;
1060 
1061  // Okay, we've committed to selecting this global. Set up the basic address.
1062  AM.GV = GV;
1063 
1064  // Return a direct reference to the global. Fastisel can handle calls to
1065  // functions that require loads, such as dllimport and nonlazybind
1066  // functions.
1067  if (Subtarget->isPICStyleRIPRel()) {
1068  // Use rip-relative addressing if we can. Above we verified that the
1069  // base and index registers are unused.
1070  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
1071  AM.Base.Reg = X86::RIP;
1072  } else {
1073  AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
1074  }
1075 
1076  return true;
1077  }
1078 
1079  // If all else fails, try to materialize the value in a register.
1080  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
1081  if (AM.Base.Reg == 0) {
1082  AM.Base.Reg = getRegForValue(V);
1083  return AM.Base.Reg != 0;
1084  }
1085  if (AM.IndexReg == 0) {
1086  assert(AM.Scale == 1 && "Scale with no index!");
1087  AM.IndexReg = getRegForValue(V);
1088  return AM.IndexReg != 0;
1089  }
1090  }
1091 
1092  return false;
1093 }
1094 
1095 
1096 /// X86SelectStore - Select and emit code to implement store instructions.
1097 bool X86FastISel::X86SelectStore(const Instruction *I) {
1098  // Atomic stores need special handling.
1099  const StoreInst *S = cast<StoreInst>(I);
1100 
1101  if (S->isAtomic())
1102  return false;
1103 
1104  const Value *PtrV = I->getOperand(1);
1105  if (TLI.supportSwiftError()) {
1106  // Swifterror values can come from either a function parameter with
1107  // swifterror attribute or an alloca with swifterror attribute.
1108  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1109  if (Arg->hasSwiftErrorAttr())
1110  return false;
1111  }
1112 
1113  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1114  if (Alloca->isSwiftError())
1115  return false;
1116  }
1117  }
1118 
1119  const Value *Val = S->getValueOperand();
1120  const Value *Ptr = S->getPointerOperand();
1121 
1122  MVT VT;
1123  if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
1124  return false;
1125 
1126  unsigned Alignment = S->getAlignment();
1127  unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
1128  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1129  Alignment = ABIAlignment;
1130  bool Aligned = Alignment >= ABIAlignment;
1131 
1132  X86AddressMode AM;
1133  if (!X86SelectAddress(Ptr, AM))
1134  return false;
1135 
1136  return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
1137 }
1138 
1139 /// X86SelectRet - Select and emit code to implement ret instructions.
1140 bool X86FastISel::X86SelectRet(const Instruction *I) {
1141  const ReturnInst *Ret = cast<ReturnInst>(I);
1142  const Function &F = *I->getParent()->getParent();
1143  const X86MachineFunctionInfo *X86MFInfo =
1144  FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
1145 
1146  if (!FuncInfo.CanLowerReturn)
1147  return false;
1148 
1149  if (TLI.supportSwiftError() &&
1150  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1151  return false;
1152 
1153  if (TLI.supportSplitCSR(FuncInfo.MF))
1154  return false;
1155 
1156  CallingConv::ID CC = F.getCallingConv();
1157  if (CC != CallingConv::C &&
1158  CC != CallingConv::Fast &&
1159  CC != CallingConv::X86_FastCall &&
1160  CC != CallingConv::X86_StdCall &&
1161  CC != CallingConv::X86_ThisCall &&
1162  CC != CallingConv::X86_64_SysV &&
1163  CC != CallingConv::Win64)
1164  return false;
1165 
1166  // Don't handle popping bytes if they don't fit the ret's immediate.
1167  if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
1168  return false;
1169 
1170  // fastcc with -tailcallopt is intended to provide a guaranteed
1171  // tail call optimization. Fastisel doesn't know how to do that.
1172  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
1173  return false;
1174 
1175  // Let SDISel handle vararg functions.
1176  if (F.isVarArg())
1177  return false;
1178 
1179  // Build a list of return value registers.
1180  SmallVector<unsigned, 4> RetRegs;
1181 
1182  if (Ret->getNumOperands() > 0) {
1184  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1185 
1186  // Analyze operands of the call, assigning locations to each operand.
1188  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
1189  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1190 
1191  const Value *RV = Ret->getOperand(0);
1192  unsigned Reg = getRegForValue(RV);
1193  if (Reg == 0)
1194  return false;
1195 
1196  // Only handle a single return value for now.
1197  if (ValLocs.size() != 1)
1198  return false;
1199 
1200  CCValAssign &VA = ValLocs[0];
1201 
1202  // Don't bother handling odd stuff for now.
1203  if (VA.getLocInfo() != CCValAssign::Full)
1204  return false;
1205  // Only handle register returns for now.
1206  if (!VA.isRegLoc())
1207  return false;
1208 
1209  // The calling-convention tables for x87 returns don't tell
1210  // the whole story.
1211  if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
1212  return false;
1213 
1214  unsigned SrcReg = Reg + VA.getValNo();
1215  EVT SrcVT = TLI.getValueType(DL, RV->getType());
1216  EVT DstVT = VA.getValVT();
1217  // Special handling for extended integers.
1218  if (SrcVT != DstVT) {
1219  if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1220  return false;
1221 
1222  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1223  return false;
1224 
1225  assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1226 
1227  if (SrcVT == MVT::i1) {
1228  if (Outs[0].Flags.isSExt())
1229  return false;
1230  SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
1231  SrcVT = MVT::i8;
1232  }
1233  unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1235  SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
1236  SrcReg, /*TODO: Kill=*/false);
1237  }
1238 
1239  // Make the copy.
1240  unsigned DstReg = VA.getLocReg();
1241  const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
1242  // Avoid a cross-class copy. This is very unlikely.
1243  if (!SrcRC->contains(DstReg))
1244  return false;
1245  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1246  TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
1247 
1248  // Add register to return instruction.
1249  RetRegs.push_back(VA.getLocReg());
1250  }
1251 
1252  // Swift calling convention does not require we copy the sret argument
1253  // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1254 
1255  // All x86 ABIs require that for returning structs by value we copy
1256  // the sret argument into %rax/%eax (depending on ABI) for the return.
1257  // We saved the argument into a virtual register in the entry block,
1258  // so now we copy the value out and into %rax/%eax.
1259  if (F.hasStructRetAttr() && CC != CallingConv::Swift) {
1260  unsigned Reg = X86MFInfo->getSRetReturnReg();
1261  assert(Reg &&
1262  "SRetReturnReg should have been set in LowerFormalArguments()!");
1263  unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
1264  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1265  TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
1266  RetRegs.push_back(RetReg);
1267  }
1268 
1269  // Now emit the RET.
1270  MachineInstrBuilder MIB;
1271  if (X86MFInfo->getBytesToPopOnReturn()) {
1272  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1273  TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL))
1274  .addImm(X86MFInfo->getBytesToPopOnReturn());
1275  } else {
1276  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1277  TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
1278  }
1279  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1280  MIB.addReg(RetRegs[i], RegState::Implicit);
1281  return true;
1282 }
1283 
1284 /// X86SelectLoad - Select and emit code to implement load instructions.
1285 ///
1286 bool X86FastISel::X86SelectLoad(const Instruction *I) {
1287  const LoadInst *LI = cast<LoadInst>(I);
1288 
1289  // Atomic loads need special handling.
1290  if (LI->isAtomic())
1291  return false;
1292 
1293  const Value *SV = I->getOperand(0);
1294  if (TLI.supportSwiftError()) {
1295  // Swifterror values can come from either a function parameter with
1296  // swifterror attribute or an alloca with swifterror attribute.
1297  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1298  if (Arg->hasSwiftErrorAttr())
1299  return false;
1300  }
1301 
1302  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1303  if (Alloca->isSwiftError())
1304  return false;
1305  }
1306  }
1307 
1308  MVT VT;
1309  if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1310  return false;
1311 
1312  const Value *Ptr = LI->getPointerOperand();
1313 
1314  X86AddressMode AM;
1315  if (!X86SelectAddress(Ptr, AM))
1316  return false;
1317 
1318  unsigned Alignment = LI->getAlignment();
1319  unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
1320  if (Alignment == 0) // Ensure that codegen never sees alignment 0
1321  Alignment = ABIAlignment;
1322 
1323  unsigned ResultReg = 0;
1324  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1325  Alignment))
1326  return false;
1327 
1328  updateValueMap(I, ResultReg);
1329  return true;
1330 }
1331 
1332 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1333  bool HasAVX512 = Subtarget->hasAVX512();
1334  bool HasAVX = Subtarget->hasAVX();
1335  bool X86ScalarSSEf32 = Subtarget->hasSSE1();
1336  bool X86ScalarSSEf64 = Subtarget->hasSSE2();
1337 
1338  switch (VT.getSimpleVT().SimpleTy) {
1339  default: return 0;
1340  case MVT::i8: return X86::CMP8rr;
1341  case MVT::i16: return X86::CMP16rr;
1342  case MVT::i32: return X86::CMP32rr;
1343  case MVT::i64: return X86::CMP64rr;
1344  case MVT::f32:
1345  return X86ScalarSSEf32
1346  ? (HasAVX512 ? X86::VUCOMISSZrr
1347  : HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr)
1348  : 0;
1349  case MVT::f64:
1350  return X86ScalarSSEf64
1351  ? (HasAVX512 ? X86::VUCOMISDZrr
1352  : HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr)
1353  : 0;
1354  }
1355 }
1356 
1357 /// If we have a comparison with RHS as the RHS of the comparison, return an
1358 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1359 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1360  int64_t Val = RHSC->getSExtValue();
1361  switch (VT.getSimpleVT().SimpleTy) {
1362  // Otherwise, we can't fold the immediate into this comparison.
1363  default:
1364  return 0;
1365  case MVT::i8:
1366  return X86::CMP8ri;
1367  case MVT::i16:
1368  if (isInt<8>(Val))
1369  return X86::CMP16ri8;
1370  return X86::CMP16ri;
1371  case MVT::i32:
1372  if (isInt<8>(Val))
1373  return X86::CMP32ri8;
1374  return X86::CMP32ri;
1375  case MVT::i64:
1376  if (isInt<8>(Val))
1377  return X86::CMP64ri8;
1378  // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1379  // field.
1380  if (isInt<32>(Val))
1381  return X86::CMP64ri32;
1382  return 0;
1383  }
1384 }
1385 
1386 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
1387  const DebugLoc &CurDbgLoc) {
1388  unsigned Op0Reg = getRegForValue(Op0);
1389  if (Op0Reg == 0) return false;
1390 
1391  // Handle 'null' like i32/i64 0.
1392  if (isa<ConstantPointerNull>(Op1))
1393  Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1394 
1395  // We have two options: compare with register or immediate. If the RHS of
1396  // the compare is an immediate that we can fold into this compare, use
1397  // CMPri, otherwise use CMPrr.
1398  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1399  if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1400  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
1401  .addReg(Op0Reg)
1402  .addImm(Op1C->getSExtValue());
1403  return true;
1404  }
1405  }
1406 
1407  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1408  if (CompareOpc == 0) return false;
1409 
1410  unsigned Op1Reg = getRegForValue(Op1);
1411  if (Op1Reg == 0) return false;
1412  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
1413  .addReg(Op0Reg)
1414  .addReg(Op1Reg);
1415 
1416  return true;
1417 }
1418 
1419 bool X86FastISel::X86SelectCmp(const Instruction *I) {
1420  const CmpInst *CI = cast<CmpInst>(I);
1421 
1422  MVT VT;
1423  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1424  return false;
1425 
1426  // Try to optimize or fold the cmp.
1427  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1428  unsigned ResultReg = 0;
1429  switch (Predicate) {
1430  default: break;
1431  case CmpInst::FCMP_FALSE: {
1432  ResultReg = createResultReg(&X86::GR32RegClass);
1433  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
1434  ResultReg);
1435  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
1436  X86::sub_8bit);
1437  if (!ResultReg)
1438  return false;
1439  break;
1440  }
1441  case CmpInst::FCMP_TRUE: {
1442  ResultReg = createResultReg(&X86::GR8RegClass);
1443  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
1444  ResultReg).addImm(1);
1445  break;
1446  }
1447  }
1448 
1449  if (ResultReg) {
1450  updateValueMap(I, ResultReg);
1451  return true;
1452  }
1453 
1454  const Value *LHS = CI->getOperand(0);
1455  const Value *RHS = CI->getOperand(1);
1456 
1457  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1458  // We don't have to materialize a zero constant for this case and can just use
1459  // %x again on the RHS.
1460  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1461  const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1462  if (RHSC && RHSC->isNullValue())
1463  RHS = LHS;
1464  }
1465 
1466  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1467  static const uint16_t SETFOpcTable[2][3] = {
1468  { X86::COND_E, X86::COND_NP, X86::AND8rr },
1469  { X86::COND_NE, X86::COND_P, X86::OR8rr }
1470  };
1471  const uint16_t *SETFOpc = nullptr;
1472  switch (Predicate) {
1473  default: break;
1474  case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1475  case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1476  }
1477 
1478  ResultReg = createResultReg(&X86::GR8RegClass);
1479  if (SETFOpc) {
1480  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1481  return false;
1482 
1483  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
1484  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
1485  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1486  FlagReg1).addImm(SETFOpc[0]);
1487  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1488  FlagReg2).addImm(SETFOpc[1]);
1489  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
1490  ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1491  updateValueMap(I, ResultReg);
1492  return true;
1493  }
1494 
1495  X86::CondCode CC;
1496  bool SwapArgs;
1497  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1498  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1499 
1500  if (SwapArgs)
1501  std::swap(LHS, RHS);
1502 
1503  // Emit a compare of LHS/RHS.
1504  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1505  return false;
1506 
1507  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1508  ResultReg).addImm(CC);
1509  updateValueMap(I, ResultReg);
1510  return true;
1511 }
1512 
1513 bool X86FastISel::X86SelectZExt(const Instruction *I) {
1514  EVT DstVT = TLI.getValueType(DL, I->getType());
1515  if (!TLI.isTypeLegal(DstVT))
1516  return false;
1517 
1518  unsigned ResultReg = getRegForValue(I->getOperand(0));
1519  if (ResultReg == 0)
1520  return false;
1521 
1522  // Handle zero-extension from i1 to i8, which is common.
1523  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1524  if (SrcVT == MVT::i1) {
1525  // Set the high bits to zero.
1526  ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
1527  SrcVT = MVT::i8;
1528 
1529  if (ResultReg == 0)
1530  return false;
1531  }
1532 
1533  if (DstVT == MVT::i64) {
1534  // Handle extension to 64-bits via sub-register shenanigans.
1535  unsigned MovInst;
1536 
1537  switch (SrcVT.SimpleTy) {
1538  case MVT::i8: MovInst = X86::MOVZX32rr8; break;
1539  case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1540  case MVT::i32: MovInst = X86::MOV32rr; break;
1541  default: llvm_unreachable("Unexpected zext to i64 source type");
1542  }
1543 
1544  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1545  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
1546  .addReg(ResultReg);
1547 
1548  ResultReg = createResultReg(&X86::GR64RegClass);
1549  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
1550  ResultReg)
1551  .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1552  } else if (DstVT == MVT::i16) {
1553  // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1554  // extend to 32-bits and then extract down to 16-bits.
1555  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1556  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8),
1557  Result32).addReg(ResultReg);
1558 
1559  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1560  X86::sub_16bit);
1561  } else if (DstVT != MVT::i8) {
1562  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1563  ResultReg, /*Kill=*/true);
1564  if (ResultReg == 0)
1565  return false;
1566  }
1567 
1568  updateValueMap(I, ResultReg);
1569  return true;
1570 }
1571 
1572 bool X86FastISel::X86SelectSExt(const Instruction *I) {
1573  EVT DstVT = TLI.getValueType(DL, I->getType());
1574  if (!TLI.isTypeLegal(DstVT))
1575  return false;
1576 
1577  unsigned ResultReg = getRegForValue(I->getOperand(0));
1578  if (ResultReg == 0)
1579  return false;
1580 
1581  // Handle sign-extension from i1 to i8.
1582  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1583  if (SrcVT == MVT::i1) {
1584  // Set the high bits to zero.
1585  unsigned ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg,
1586  /*TODO: Kill=*/false);
1587  if (ZExtReg == 0)
1588  return false;
1589 
1590  // Negate the result to make an 8-bit sign extended value.
1591  ResultReg = createResultReg(&X86::GR8RegClass);
1592  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::NEG8r),
1593  ResultReg).addReg(ZExtReg);
1594 
1595  SrcVT = MVT::i8;
1596  }
1597 
1598  if (DstVT == MVT::i16) {
1599  // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1600  // extend to 32-bits and then extract down to 16-bits.
1601  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1602  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8),
1603  Result32).addReg(ResultReg);
1604 
1605  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1606  X86::sub_16bit);
1607  } else if (DstVT != MVT::i8) {
1608  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
1609  ResultReg, /*Kill=*/true);
1610  if (ResultReg == 0)
1611  return false;
1612  }
1613 
1614  updateValueMap(I, ResultReg);
1615  return true;
1616 }
1617 
1618 bool X86FastISel::X86SelectBranch(const Instruction *I) {
1619  // Unconditional branches are selected by tablegen-generated code.
1620  // Handle a conditional branch.
1621  const BranchInst *BI = cast<BranchInst>(I);
1622  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1623  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1624 
1625  // Fold the common case of a conditional branch with a comparison
1626  // in the same block (values defined on other blocks may not have
1627  // initialized registers).
1628  X86::CondCode CC;
1629  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1630  if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1631  EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1632 
1633  // Try to optimize or fold the cmp.
1634  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1635  switch (Predicate) {
1636  default: break;
1637  case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
1638  case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
1639  }
1640 
1641  const Value *CmpLHS = CI->getOperand(0);
1642  const Value *CmpRHS = CI->getOperand(1);
1643 
1644  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1645  // 0.0.
1646  // We don't have to materialize a zero constant for this case and can just
1647  // use %x again on the RHS.
1648  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1649  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1650  if (CmpRHSC && CmpRHSC->isNullValue())
1651  CmpRHS = CmpLHS;
1652  }
1653 
1654  // Try to take advantage of fallthrough opportunities.
1655  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1656  std::swap(TrueMBB, FalseMBB);
1657  Predicate = CmpInst::getInversePredicate(Predicate);
1658  }
1659 
1660  // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1661  // code check. Instead two branch instructions are required to check all
1662  // the flags. First we change the predicate to a supported condition code,
1663  // which will be the first branch. Later one we will emit the second
1664  // branch.
1665  bool NeedExtraBranch = false;
1666  switch (Predicate) {
1667  default: break;
1668  case CmpInst::FCMP_OEQ:
1669  std::swap(TrueMBB, FalseMBB);
1671  case CmpInst::FCMP_UNE:
1672  NeedExtraBranch = true;
1673  Predicate = CmpInst::FCMP_ONE;
1674  break;
1675  }
1676 
1677  bool SwapArgs;
1678  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1679  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1680 
1681  if (SwapArgs)
1682  std::swap(CmpLHS, CmpRHS);
1683 
1684  // Emit a compare of the LHS and RHS, setting the flags.
1685  if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
1686  return false;
1687 
1688  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1689  .addMBB(TrueMBB).addImm(CC);
1690 
1691  // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1692  // to UNE above).
1693  if (NeedExtraBranch) {
1694  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1695  .addMBB(TrueMBB).addImm(X86::COND_P);
1696  }
1697 
1698  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1699  return true;
1700  }
1701  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1702  // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1703  // typically happen for _Bool and C++ bools.
1704  MVT SourceVT;
1705  if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1706  isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1707  unsigned TestOpc = 0;
1708  switch (SourceVT.SimpleTy) {
1709  default: break;
1710  case MVT::i8: TestOpc = X86::TEST8ri; break;
1711  case MVT::i16: TestOpc = X86::TEST16ri; break;
1712  case MVT::i32: TestOpc = X86::TEST32ri; break;
1713  case MVT::i64: TestOpc = X86::TEST64ri32; break;
1714  }
1715  if (TestOpc) {
1716  unsigned OpReg = getRegForValue(TI->getOperand(0));
1717  if (OpReg == 0) return false;
1718 
1719  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
1720  .addReg(OpReg).addImm(1);
1721 
1722  unsigned JmpCond = X86::COND_NE;
1723  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1724  std::swap(TrueMBB, FalseMBB);
1725  JmpCond = X86::COND_E;
1726  }
1727 
1728  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1729  .addMBB(TrueMBB).addImm(JmpCond);
1730 
1731  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1732  return true;
1733  }
1734  }
1735  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1736  // Fake request the condition, otherwise the intrinsic might be completely
1737  // optimized away.
1738  unsigned TmpReg = getRegForValue(BI->getCondition());
1739  if (TmpReg == 0)
1740  return false;
1741 
1742  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1743  .addMBB(TrueMBB).addImm(CC);
1744  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1745  return true;
1746  }
1747 
1748  // Otherwise do a clumsy setcc and re-test it.
1749  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1750  // in an explicit cast, so make sure to handle that correctly.
1751  unsigned OpReg = getRegForValue(BI->getCondition());
1752  if (OpReg == 0) return false;
1753 
1754  // In case OpReg is a K register, COPY to a GPR
1755  if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1756  unsigned KOpReg = OpReg;
1757  OpReg = createResultReg(&X86::GR32RegClass);
1758  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1759  TII.get(TargetOpcode::COPY), OpReg)
1760  .addReg(KOpReg);
1761  OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
1762  X86::sub_8bit);
1763  }
1764  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1765  .addReg(OpReg)
1766  .addImm(1);
1767  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1768  .addMBB(TrueMBB).addImm(X86::COND_NE);
1769  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1770  return true;
1771 }
1772 
1773 bool X86FastISel::X86SelectShift(const Instruction *I) {
1774  unsigned CReg = 0, OpReg = 0;
1775  const TargetRegisterClass *RC = nullptr;
1776  if (I->getType()->isIntegerTy(8)) {
1777  CReg = X86::CL;
1778  RC = &X86::GR8RegClass;
1779  switch (I->getOpcode()) {
1780  case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1781  case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1782  case Instruction::Shl: OpReg = X86::SHL8rCL; break;
1783  default: return false;
1784  }
1785  } else if (I->getType()->isIntegerTy(16)) {
1786  CReg = X86::CX;
1787  RC = &X86::GR16RegClass;
1788  switch (I->getOpcode()) {
1789  default: llvm_unreachable("Unexpected shift opcode");
1790  case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1791  case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1792  case Instruction::Shl: OpReg = X86::SHL16rCL; break;
1793  }
1794  } else if (I->getType()->isIntegerTy(32)) {
1795  CReg = X86::ECX;
1796  RC = &X86::GR32RegClass;
1797  switch (I->getOpcode()) {
1798  default: llvm_unreachable("Unexpected shift opcode");
1799  case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1800  case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1801  case Instruction::Shl: OpReg = X86::SHL32rCL; break;
1802  }
1803  } else if (I->getType()->isIntegerTy(64)) {
1804  CReg = X86::RCX;
1805  RC = &X86::GR64RegClass;
1806  switch (I->getOpcode()) {
1807  default: llvm_unreachable("Unexpected shift opcode");
1808  case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1809  case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1810  case Instruction::Shl: OpReg = X86::SHL64rCL; break;
1811  }
1812  } else {
1813  return false;
1814  }
1815 
1816  MVT VT;
1817  if (!isTypeLegal(I->getType(), VT))
1818  return false;
1819 
1820  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1821  if (Op0Reg == 0) return false;
1822 
1823  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1824  if (Op1Reg == 0) return false;
1825  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1826  CReg).addReg(Op1Reg);
1827 
1828  // The shift instruction uses X86::CL. If we defined a super-register
1829  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1830  if (CReg != X86::CL)
1831  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1832  TII.get(TargetOpcode::KILL), X86::CL)
1833  .addReg(CReg, RegState::Kill);
1834 
1835  unsigned ResultReg = createResultReg(RC);
1836  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
1837  .addReg(Op0Reg);
1838  updateValueMap(I, ResultReg);
1839  return true;
1840 }
1841 
1842 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1843  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1844  const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1845  const static bool S = true; // IsSigned
1846  const static bool U = false; // !IsSigned
1847  const static unsigned Copy = TargetOpcode::COPY;
1848  // For the X86 DIV/IDIV instruction, in most cases the dividend
1849  // (numerator) must be in a specific register pair highreg:lowreg,
1850  // producing the quotient in lowreg and the remainder in highreg.
1851  // For most data types, to set up the instruction, the dividend is
1852  // copied into lowreg, and lowreg is sign-extended or zero-extended
1853  // into highreg. The exception is i8, where the dividend is defined
1854  // as a single register rather than a register pair, and we
1855  // therefore directly sign-extend or zero-extend the dividend into
1856  // lowreg, instead of copying, and ignore the highreg.
1857  const static struct DivRemEntry {
1858  // The following portion depends only on the data type.
1859  const TargetRegisterClass *RC;
1860  unsigned LowInReg; // low part of the register pair
1861  unsigned HighInReg; // high part of the register pair
1862  // The following portion depends on both the data type and the operation.
1863  struct DivRemResult {
1864  unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1865  unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1866  // highreg, or copying a zero into highreg.
1867  unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1868  // zero/sign-extending into lowreg for i8.
1869  unsigned DivRemResultReg; // Register containing the desired result.
1870  bool IsOpSigned; // Whether to use signed or unsigned form.
1871  } ResultTable[NumOps];
1872  } OpTable[NumTypes] = {
1873  { &X86::GR8RegClass, X86::AX, 0, {
1874  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1875  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1876  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1877  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1878  }
1879  }, // i8
1880  { &X86::GR16RegClass, X86::AX, X86::DX, {
1881  { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1882  { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1883  { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
1884  { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
1885  }
1886  }, // i16
1887  { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1888  { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1889  { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1890  { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1891  { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1892  }
1893  }, // i32
1894  { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1895  { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1896  { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1897  { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
1898  { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
1899  }
1900  }, // i64
1901  };
1902 
1903  MVT VT;
1904  if (!isTypeLegal(I->getType(), VT))
1905  return false;
1906 
1907  unsigned TypeIndex, OpIndex;
1908  switch (VT.SimpleTy) {
1909  default: return false;
1910  case MVT::i8: TypeIndex = 0; break;
1911  case MVT::i16: TypeIndex = 1; break;
1912  case MVT::i32: TypeIndex = 2; break;
1913  case MVT::i64: TypeIndex = 3;
1914  if (!Subtarget->is64Bit())
1915  return false;
1916  break;
1917  }
1918 
1919  switch (I->getOpcode()) {
1920  default: llvm_unreachable("Unexpected div/rem opcode");
1921  case Instruction::SDiv: OpIndex = 0; break;
1922  case Instruction::SRem: OpIndex = 1; break;
1923  case Instruction::UDiv: OpIndex = 2; break;
1924  case Instruction::URem: OpIndex = 3; break;
1925  }
1926 
1927  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1928  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1929  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1930  if (Op0Reg == 0)
1931  return false;
1932  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1933  if (Op1Reg == 0)
1934  return false;
1935 
1936  // Move op0 into low-order input register.
1937  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1938  TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1939  // Zero-extend or sign-extend into high-order input register.
1940  if (OpEntry.OpSignExtend) {
1941  if (OpEntry.IsOpSigned)
1942  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1943  TII.get(OpEntry.OpSignExtend));
1944  else {
1945  unsigned Zero32 = createResultReg(&X86::GR32RegClass);
1946  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1947  TII.get(X86::MOV32r0), Zero32);
1948 
1949  // Copy the zero into the appropriate sub/super/identical physical
1950  // register. Unfortunately the operations needed are not uniform enough
1951  // to fit neatly into the table above.
1952  if (VT == MVT::i16) {
1953  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1954  TII.get(Copy), TypeEntry.HighInReg)
1955  .addReg(Zero32, 0, X86::sub_16bit);
1956  } else if (VT == MVT::i32) {
1957  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1958  TII.get(Copy), TypeEntry.HighInReg)
1959  .addReg(Zero32);
1960  } else if (VT == MVT::i64) {
1961  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1962  TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1963  .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1964  }
1965  }
1966  }
1967  // Generate the DIV/IDIV instruction.
1968  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1969  TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1970  // For i8 remainder, we can't reference ah directly, as we'll end
1971  // up with bogus copies like %r9b = COPY %ah. Reference ax
1972  // instead to prevent ah references in a rex instruction.
1973  //
1974  // The current assumption of the fast register allocator is that isel
1975  // won't generate explicit references to the GR8_NOREX registers. If
1976  // the allocator and/or the backend get enhanced to be more robust in
1977  // that regard, this can be, and should be, removed.
1978  unsigned ResultReg = 0;
1979  if ((I->getOpcode() == Instruction::SRem ||
1980  I->getOpcode() == Instruction::URem) &&
1981  OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
1982  unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
1983  unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
1984  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1985  TII.get(Copy), SourceSuperReg).addReg(X86::AX);
1986 
1987  // Shift AX right by 8 bits instead of using AH.
1988  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
1989  ResultSuperReg).addReg(SourceSuperReg).addImm(8);
1990 
1991  // Now reference the 8-bit subreg of the result.
1992  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
1993  /*Kill=*/true, X86::sub_8bit);
1994  }
1995  // Copy the result out of the physreg if we haven't already.
1996  if (!ResultReg) {
1997  ResultReg = createResultReg(TypeEntry.RC);
1998  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
1999  .addReg(OpEntry.DivRemResultReg);
2000  }
2001  updateValueMap(I, ResultReg);
2002 
2003  return true;
2004 }
2005 
2006 /// Emit a conditional move instruction (if the are supported) to lower
2007 /// the select.
2008 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
2009  // Check if the subtarget supports these instructions.
2010  if (!Subtarget->hasCMov())
2011  return false;
2012 
2013  // FIXME: Add support for i8.
2014  if (RetVT < MVT::i16 || RetVT > MVT::i64)
2015  return false;
2016 
2017  const Value *Cond = I->getOperand(0);
2018  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2019  bool NeedTest = true;
2021 
2022  // Optimize conditions coming from a compare if both instructions are in the
2023  // same basic block (values defined in other basic blocks may not have
2024  // initialized registers).
2025  const auto *CI = dyn_cast<CmpInst>(Cond);
2026  if (CI && (CI->getParent() == I->getParent())) {
2027  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2028 
2029  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2030  static const uint16_t SETFOpcTable[2][3] = {
2031  { X86::COND_NP, X86::COND_E, X86::TEST8rr },
2032  { X86::COND_P, X86::COND_NE, X86::OR8rr }
2033  };
2034  const uint16_t *SETFOpc = nullptr;
2035  switch (Predicate) {
2036  default: break;
2037  case CmpInst::FCMP_OEQ:
2038  SETFOpc = &SETFOpcTable[0][0];
2039  Predicate = CmpInst::ICMP_NE;
2040  break;
2041  case CmpInst::FCMP_UNE:
2042  SETFOpc = &SETFOpcTable[1][0];
2043  Predicate = CmpInst::ICMP_NE;
2044  break;
2045  }
2046 
2047  bool NeedSwap;
2048  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
2049  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
2050 
2051  const Value *CmpLHS = CI->getOperand(0);
2052  const Value *CmpRHS = CI->getOperand(1);
2053  if (NeedSwap)
2054  std::swap(CmpLHS, CmpRHS);
2055 
2056  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2057  // Emit a compare of the LHS and RHS, setting the flags.
2058  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2059  return false;
2060 
2061  if (SETFOpc) {
2062  unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
2063  unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
2064  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2065  FlagReg1).addImm(SETFOpc[0]);
2066  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2067  FlagReg2).addImm(SETFOpc[1]);
2068  auto const &II = TII.get(SETFOpc[2]);
2069  if (II.getNumDefs()) {
2070  unsigned TmpReg = createResultReg(&X86::GR8RegClass);
2071  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
2072  .addReg(FlagReg2).addReg(FlagReg1);
2073  } else {
2074  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2075  .addReg(FlagReg2).addReg(FlagReg1);
2076  }
2077  }
2078  NeedTest = false;
2079  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
2080  // Fake request the condition, otherwise the intrinsic might be completely
2081  // optimized away.
2082  unsigned TmpReg = getRegForValue(Cond);
2083  if (TmpReg == 0)
2084  return false;
2085 
2086  NeedTest = false;
2087  }
2088 
2089  if (NeedTest) {
2090  // Selects operate on i1, however, CondReg is 8 bits width and may contain
2091  // garbage. Indeed, only the less significant bit is supposed to be
2092  // accurate. If we read more than the lsb, we may see non-zero values
2093  // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2094  // the select. This is achieved by performing TEST against 1.
2095  unsigned CondReg = getRegForValue(Cond);
2096  if (CondReg == 0)
2097  return false;
2098  bool CondIsKill = hasTrivialKill(Cond);
2099 
2100  // In case OpReg is a K register, COPY to a GPR
2101  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2102  unsigned KCondReg = CondReg;
2103  CondReg = createResultReg(&X86::GR32RegClass);
2104  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2105  TII.get(TargetOpcode::COPY), CondReg)
2106  .addReg(KCondReg, getKillRegState(CondIsKill));
2107  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2108  X86::sub_8bit);
2109  }
2110  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2111  .addReg(CondReg, getKillRegState(CondIsKill))
2112  .addImm(1);
2113  }
2114 
2115  const Value *LHS = I->getOperand(1);
2116  const Value *RHS = I->getOperand(2);
2117 
2118  unsigned RHSReg = getRegForValue(RHS);
2119  bool RHSIsKill = hasTrivialKill(RHS);
2120 
2121  unsigned LHSReg = getRegForValue(LHS);
2122  bool LHSIsKill = hasTrivialKill(LHS);
2123 
2124  if (!LHSReg || !RHSReg)
2125  return false;
2126 
2127  const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
2128  unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(*RC)/8);
2129  unsigned ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill,
2130  LHSReg, LHSIsKill, CC);
2131  updateValueMap(I, ResultReg);
2132  return true;
2133 }
2134 
2135 /// Emit SSE or AVX instructions to lower the select.
2136 ///
2137 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2138 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2139 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2140 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
2141  // Optimize conditions coming from a compare if both instructions are in the
2142  // same basic block (values defined in other basic blocks may not have
2143  // initialized registers).
2144  const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
2145  if (!CI || (CI->getParent() != I->getParent()))
2146  return false;
2147 
2148  if (I->getType() != CI->getOperand(0)->getType() ||
2149  !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
2150  (Subtarget->hasSSE2() && RetVT == MVT::f64)))
2151  return false;
2152 
2153  const Value *CmpLHS = CI->getOperand(0);
2154  const Value *CmpRHS = CI->getOperand(1);
2155  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2156 
2157  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2158  // We don't have to materialize a zero constant for this case and can just use
2159  // %x again on the RHS.
2160  if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
2161  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2162  if (CmpRHSC && CmpRHSC->isNullValue())
2163  CmpRHS = CmpLHS;
2164  }
2165 
2166  unsigned CC;
2167  bool NeedSwap;
2168  std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
2169  if (CC > 7 && !Subtarget->hasAVX())
2170  return false;
2171 
2172  if (NeedSwap)
2173  std::swap(CmpLHS, CmpRHS);
2174 
2175  const Value *LHS = I->getOperand(1);
2176  const Value *RHS = I->getOperand(2);
2177 
2178  unsigned LHSReg = getRegForValue(LHS);
2179  bool LHSIsKill = hasTrivialKill(LHS);
2180 
2181  unsigned RHSReg = getRegForValue(RHS);
2182  bool RHSIsKill = hasTrivialKill(RHS);
2183 
2184  unsigned CmpLHSReg = getRegForValue(CmpLHS);
2185  bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
2186 
2187  unsigned CmpRHSReg = getRegForValue(CmpRHS);
2188  bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
2189 
2190  if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
2191  return false;
2192 
2193  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2194  unsigned ResultReg;
2195 
2196  if (Subtarget->hasAVX512()) {
2197  // If we have AVX512 we can use a mask compare and masked movss/sd.
2198  const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
2199  const TargetRegisterClass *VK1 = &X86::VK1RegClass;
2200 
2201  unsigned CmpOpcode =
2202  (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2203  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
2204  CmpRHSReg, CmpRHSIsKill, CC);
2205 
2206  // Need an IMPLICIT_DEF for the input that is used to generate the upper
2207  // bits of the result register since its not based on any of the inputs.
2208  unsigned ImplicitDefReg = createResultReg(VR128X);
2209  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2210  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2211 
2212  // Place RHSReg is the passthru of the masked movss/sd operation and put
2213  // LHS in the input. The mask input comes from the compare.
2214  unsigned MovOpcode =
2215  (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2216  unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill,
2217  CmpReg, true, ImplicitDefReg, true,
2218  LHSReg, LHSIsKill);
2219 
2220  ResultReg = createResultReg(RC);
2221  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2222  TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
2223 
2224  } else if (Subtarget->hasAVX()) {
2225  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2226 
2227  // If we have AVX, create 1 blendv instead of 3 logic instructions.
2228  // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2229  // uses XMM0 as the selection register. That may need just as many
2230  // instructions as the AND/ANDN/OR sequence due to register moves, so
2231  // don't bother.
2232  unsigned CmpOpcode =
2233  (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2234  unsigned BlendOpcode =
2235  (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2236 
2237  unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
2238  CmpRHSReg, CmpRHSIsKill, CC);
2239  unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
2240  LHSReg, LHSIsKill, CmpReg, true);
2241  ResultReg = createResultReg(RC);
2242  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2243  TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
2244  } else {
2245  // Choose the SSE instruction sequence based on data type (float or double).
2246  static const uint16_t OpcTable[2][4] = {
2247  { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2248  { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2249  };
2250 
2251  const uint16_t *Opc = nullptr;
2252  switch (RetVT.SimpleTy) {
2253  default: return false;
2254  case MVT::f32: Opc = &OpcTable[0][0]; break;
2255  case MVT::f64: Opc = &OpcTable[1][0]; break;
2256  }
2257 
2258  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2259  unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
2260  CmpRHSReg, CmpRHSIsKill, CC);
2261  unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
2262  LHSReg, LHSIsKill);
2263  unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
2264  RHSReg, RHSIsKill);
2265  unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
2266  AndReg, /*IsKill=*/true);
2267  ResultReg = createResultReg(RC);
2268  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2269  TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
2270  }
2271  updateValueMap(I, ResultReg);
2272  return true;
2273 }
2274 
2275 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
2276  // These are pseudo CMOV instructions and will be later expanded into control-
2277  // flow.
2278  unsigned Opc;
2279  switch (RetVT.SimpleTy) {
2280  default: return false;
2281  case MVT::i8: Opc = X86::CMOV_GR8; break;
2282  case MVT::i16: Opc = X86::CMOV_GR16; break;
2283  case MVT::i32: Opc = X86::CMOV_GR32; break;
2284  case MVT::f32: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X
2285  : X86::CMOV_FR32; break;
2286  case MVT::f64: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X
2287  : X86::CMOV_FR64; break;
2288  }
2289 
2290  const Value *Cond = I->getOperand(0);
2292 
2293  // Optimize conditions coming from a compare if both instructions are in the
2294  // same basic block (values defined in other basic blocks may not have
2295  // initialized registers).
2296  const auto *CI = dyn_cast<CmpInst>(Cond);
2297  if (CI && (CI->getParent() == I->getParent())) {
2298  bool NeedSwap;
2299  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
2300  if (CC > X86::LAST_VALID_COND)
2301  return false;
2302 
2303  const Value *CmpLHS = CI->getOperand(0);
2304  const Value *CmpRHS = CI->getOperand(1);
2305 
2306  if (NeedSwap)
2307  std::swap(CmpLHS, CmpRHS);
2308 
2309  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2310  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2311  return false;
2312  } else {
2313  unsigned CondReg = getRegForValue(Cond);
2314  if (CondReg == 0)
2315  return false;
2316  bool CondIsKill = hasTrivialKill(Cond);
2317 
2318  // In case OpReg is a K register, COPY to a GPR
2319  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2320  unsigned KCondReg = CondReg;
2321  CondReg = createResultReg(&X86::GR32RegClass);
2322  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2323  TII.get(TargetOpcode::COPY), CondReg)
2324  .addReg(KCondReg, getKillRegState(CondIsKill));
2325  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2326  X86::sub_8bit);
2327  }
2328  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2329  .addReg(CondReg, getKillRegState(CondIsKill))
2330  .addImm(1);
2331  }
2332 
2333  const Value *LHS = I->getOperand(1);
2334  const Value *RHS = I->getOperand(2);
2335 
2336  unsigned LHSReg = getRegForValue(LHS);
2337  bool LHSIsKill = hasTrivialKill(LHS);
2338 
2339  unsigned RHSReg = getRegForValue(RHS);
2340  bool RHSIsKill = hasTrivialKill(RHS);
2341 
2342  if (!LHSReg || !RHSReg)
2343  return false;
2344 
2345  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2346 
2347  unsigned ResultReg =
2348  fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
2349  updateValueMap(I, ResultReg);
2350  return true;
2351 }
2352 
2353 bool X86FastISel::X86SelectSelect(const Instruction *I) {
2354  MVT RetVT;
2355  if (!isTypeLegal(I->getType(), RetVT))
2356  return false;
2357 
2358  // Check if we can fold the select.
2359  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2360  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2361  const Value *Opnd = nullptr;
2362  switch (Predicate) {
2363  default: break;
2364  case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2365  case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
2366  }
2367  // No need for a select anymore - this is an unconditional move.
2368  if (Opnd) {
2369  unsigned OpReg = getRegForValue(Opnd);
2370  if (OpReg == 0)
2371  return false;
2372  bool OpIsKill = hasTrivialKill(Opnd);
2373  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2374  unsigned ResultReg = createResultReg(RC);
2375  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2376  TII.get(TargetOpcode::COPY), ResultReg)
2377  .addReg(OpReg, getKillRegState(OpIsKill));
2378  updateValueMap(I, ResultReg);
2379  return true;
2380  }
2381  }
2382 
2383  // First try to use real conditional move instructions.
2384  if (X86FastEmitCMoveSelect(RetVT, I))
2385  return true;
2386 
2387  // Try to use a sequence of SSE instructions to simulate a conditional move.
2388  if (X86FastEmitSSESelect(RetVT, I))
2389  return true;
2390 
2391  // Fall-back to pseudo conditional move instructions, which will be later
2392  // converted to control-flow.
2393  if (X86FastEmitPseudoSelect(RetVT, I))
2394  return true;
2395 
2396  return false;
2397 }
2398 
2399 // Common code for X86SelectSIToFP and X86SelectUIToFP.
2400 bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
2401  // The target-independent selection algorithm in FastISel already knows how
2402  // to select a SINT_TO_FP if the target is SSE but not AVX.
2403  // Early exit if the subtarget doesn't have AVX.
2404  // Unsigned conversion requires avx512.
2405  bool HasAVX512 = Subtarget->hasAVX512();
2406  if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
2407  return false;
2408 
2409  // TODO: We could sign extend narrower types.
2410  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
2411  if (SrcVT != MVT::i32 && SrcVT != MVT::i64)
2412  return false;
2413 
2414  // Select integer to float/double conversion.
2415  unsigned OpReg = getRegForValue(I->getOperand(0));
2416  if (OpReg == 0)
2417  return false;
2418 
2419  unsigned Opcode;
2420 
2421  static const uint16_t SCvtOpc[2][2][2] = {
2422  { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr },
2423  { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } },
2424  { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
2425  { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
2426  };
2427  static const uint16_t UCvtOpc[2][2] = {
2428  { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
2429  { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
2430  };
2431  bool Is64Bit = SrcVT == MVT::i64;
2432 
2433  if (I->getType()->isDoubleTy()) {
2434  // s/uitofp int -> double
2435  Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
2436  } else if (I->getType()->isFloatTy()) {
2437  // s/uitofp int -> float
2438  Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
2439  } else
2440  return false;
2441 
2442  MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT();
2443  const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT);
2444  unsigned ImplicitDefReg = createResultReg(RC);
2445  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2446  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2447  unsigned ResultReg =
2448  fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
2449  updateValueMap(I, ResultReg);
2450  return true;
2451 }
2452 
2453 bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
2454  return X86SelectIntToFP(I, /*IsSigned*/true);
2455 }
2456 
2457 bool X86FastISel::X86SelectUIToFP(const Instruction *I) {
2458  return X86SelectIntToFP(I, /*IsSigned*/false);
2459 }
2460 
2461 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2462 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
2463  unsigned TargetOpc,
2464  const TargetRegisterClass *RC) {
2465  assert((I->getOpcode() == Instruction::FPExt ||
2466  I->getOpcode() == Instruction::FPTrunc) &&
2467  "Instruction must be an FPExt or FPTrunc!");
2468  bool HasAVX = Subtarget->hasAVX();
2469 
2470  unsigned OpReg = getRegForValue(I->getOperand(0));
2471  if (OpReg == 0)
2472  return false;
2473 
2474  unsigned ImplicitDefReg;
2475  if (HasAVX) {
2476  ImplicitDefReg = createResultReg(RC);
2477  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2478  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2479 
2480  }
2481 
2482  unsigned ResultReg = createResultReg(RC);
2483  MachineInstrBuilder MIB;
2484  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
2485  ResultReg);
2486 
2487  if (HasAVX)
2488  MIB.addReg(ImplicitDefReg);
2489 
2490  MIB.addReg(OpReg);
2491  updateValueMap(I, ResultReg);
2492  return true;
2493 }
2494 
2495 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2496  if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
2497  I->getOperand(0)->getType()->isFloatTy()) {
2498  bool HasAVX512 = Subtarget->hasAVX512();
2499  // fpext from float to double.
2500  unsigned Opc =
2501  HasAVX512 ? X86::VCVTSS2SDZrr
2502  : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2503  return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f64));
2504  }
2505 
2506  return false;
2507 }
2508 
2509 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2510  if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
2511  I->getOperand(0)->getType()->isDoubleTy()) {
2512  bool HasAVX512 = Subtarget->hasAVX512();
2513  // fptrunc from double to float.
2514  unsigned Opc =
2515  HasAVX512 ? X86::VCVTSD2SSZrr
2516  : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2517  return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f32));
2518  }
2519 
2520  return false;
2521 }
2522 
2523 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2524  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
2525  EVT DstVT = TLI.getValueType(DL, I->getType());
2526 
2527  // This code only handles truncation to byte.
2528  if (DstVT != MVT::i8 && DstVT != MVT::i1)
2529  return false;
2530  if (!TLI.isTypeLegal(SrcVT))
2531  return false;
2532 
2533  unsigned InputReg = getRegForValue(I->getOperand(0));
2534  if (!InputReg)
2535  // Unhandled operand. Halt "fast" selection and bail.
2536  return false;
2537 
2538  if (SrcVT == MVT::i8) {
2539  // Truncate from i8 to i1; no code needed.
2540  updateValueMap(I, InputReg);
2541  return true;
2542  }
2543 
2544  // Issue an extract_subreg.
2545  unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
2546  InputReg, false,
2547  X86::sub_8bit);
2548  if (!ResultReg)
2549  return false;
2550 
2551  updateValueMap(I, ResultReg);
2552  return true;
2553 }
2554 
2555 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2556  return Len <= (Subtarget->is64Bit() ? 32 : 16);
2557 }
2558 
2559 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2560  X86AddressMode SrcAM, uint64_t Len) {
2561 
2562  // Make sure we don't bloat code by inlining very large memcpy's.
2563  if (!IsMemcpySmall(Len))
2564  return false;
2565 
2566  bool i64Legal = Subtarget->is64Bit();
2567 
2568  // We don't care about alignment here since we just emit integer accesses.
2569  while (Len) {
2570  MVT VT;
2571  if (Len >= 8 && i64Legal)
2572  VT = MVT::i64;
2573  else if (Len >= 4)
2574  VT = MVT::i32;
2575  else if (Len >= 2)
2576  VT = MVT::i16;
2577  else
2578  VT = MVT::i8;
2579 
2580  unsigned Reg;
2581  bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2582  RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
2583  assert(RV && "Failed to emit load or store??");
2584 
2585  unsigned Size = VT.getSizeInBits()/8;
2586  Len -= Size;
2587  DestAM.Disp += Size;
2588  SrcAM.Disp += Size;
2589  }
2590 
2591  return true;
2592 }
2593 
2594 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2595  // FIXME: Handle more intrinsics.
2596  switch (II->getIntrinsicID()) {
2597  default: return false;
2598  case Intrinsic::convert_from_fp16:
2599  case Intrinsic::convert_to_fp16: {
2600  if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2601  return false;
2602 
2603  const Value *Op = II->getArgOperand(0);
2604  unsigned InputReg = getRegForValue(Op);
2605  if (InputReg == 0)
2606  return false;
2607 
2608  // F16C only allows converting from float to half and from half to float.
2609  bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
2610  if (IsFloatToHalf) {
2611  if (!Op->getType()->isFloatTy())
2612  return false;
2613  } else {
2614  if (!II->getType()->isFloatTy())
2615  return false;
2616  }
2617 
2618  unsigned ResultReg = 0;
2619  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
2620  if (IsFloatToHalf) {
2621  // 'InputReg' is implicitly promoted from register class FR32 to
2622  // register class VR128 by method 'constrainOperandRegClass' which is
2623  // directly called by 'fastEmitInst_ri'.
2624  // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2625  // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2626  // It's consistent with the other FP instructions, which are usually
2627  // controlled by MXCSR.
2628  InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4);
2629 
2630  // Move the lower 32-bits of ResultReg to another register of class GR32.
2631  ResultReg = createResultReg(&X86::GR32RegClass);
2632  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2633  TII.get(X86::VMOVPDI2DIrr), ResultReg)
2634  .addReg(InputReg, RegState::Kill);
2635 
2636  // The result value is in the lower 16-bits of ResultReg.
2637  unsigned RegIdx = X86::sub_16bit;
2638  ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
2639  } else {
2640  assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2641  // Explicitly sign-extend the input to 32-bit.
2642  InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg,
2643  /*Kill=*/false);
2644 
2645  // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2646  InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2647  InputReg, /*Kill=*/true);
2648 
2649  InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
2650 
2651  // The result value is in the lower 32-bits of ResultReg.
2652  // Emit an explicit copy from register class VR128 to register class FR32.
2653  ResultReg = createResultReg(&X86::FR32RegClass);
2654  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2655  TII.get(TargetOpcode::COPY), ResultReg)
2656  .addReg(InputReg, RegState::Kill);
2657  }
2658 
2659  updateValueMap(II, ResultReg);
2660  return true;
2661  }
2662  case Intrinsic::frameaddress: {
2663  MachineFunction *MF = FuncInfo.MF;
2664  if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
2665  return false;
2666 
2667  Type *RetTy = II->getCalledFunction()->getReturnType();
2668 
2669  MVT VT;
2670  if (!isTypeLegal(RetTy, VT))
2671  return false;
2672 
2673  unsigned Opc;
2674  const TargetRegisterClass *RC = nullptr;
2675 
2676  switch (VT.SimpleTy) {
2677  default: llvm_unreachable("Invalid result type for frameaddress.");
2678  case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2679  case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2680  }
2681 
2682  // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2683  // we get the wrong frame register.
2684  MachineFrameInfo &MFI = MF->getFrameInfo();
2685  MFI.setFrameAddressIsTaken(true);
2686 
2687  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2688  unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
2689  assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2690  (FrameReg == X86::EBP && VT == MVT::i32)) &&
2691  "Invalid Frame Register!");
2692 
2693  // Always make a copy of the frame register to a vreg first, so that we
2694  // never directly reference the frame register (the TwoAddressInstruction-
2695  // Pass doesn't like that).
2696  unsigned SrcReg = createResultReg(RC);
2697  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2698  TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2699 
2700  // Now recursively load from the frame address.
2701  // movq (%rbp), %rax
2702  // movq (%rax), %rax
2703  // movq (%rax), %rax
2704  // ...
2705  unsigned DestReg;
2706  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2707  while (Depth--) {
2708  DestReg = createResultReg(RC);
2709  addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2710  TII.get(Opc), DestReg), SrcReg);
2711  SrcReg = DestReg;
2712  }
2713 
2714  updateValueMap(II, SrcReg);
2715  return true;
2716  }
2717  case Intrinsic::memcpy: {
2718  const MemCpyInst *MCI = cast<MemCpyInst>(II);
2719  // Don't handle volatile or variable length memcpys.
2720  if (MCI->isVolatile())
2721  return false;
2722 
2723  if (isa<ConstantInt>(MCI->getLength())) {
2724  // Small memcpy's are common enough that we want to do them
2725  // without a call if possible.
2726  uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
2727  if (IsMemcpySmall(Len)) {
2728  X86AddressMode DestAM, SrcAM;
2729  if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
2730  !X86SelectAddress(MCI->getRawSource(), SrcAM))
2731  return false;
2732  TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2733  return true;
2734  }
2735  }
2736 
2737  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2738  if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
2739  return false;
2740 
2741  if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
2742  return false;
2743 
2744  return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1);
2745  }
2746  case Intrinsic::memset: {
2747  const MemSetInst *MSI = cast<MemSetInst>(II);
2748 
2749  if (MSI->isVolatile())
2750  return false;
2751 
2752  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2753  if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
2754  return false;
2755 
2756  if (MSI->getDestAddressSpace() > 255)
2757  return false;
2758 
2759  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
2760  }
2761  case Intrinsic::stackprotector: {
2762  // Emit code to store the stack guard onto the stack.
2763  EVT PtrTy = TLI.getPointerTy(DL);
2764 
2765  const Value *Op1 = II->getArgOperand(0); // The guard's value.
2766  const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
2767 
2768  MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2769 
2770  // Grab the frame index.
2771  X86AddressMode AM;
2772  if (!X86SelectAddress(Slot, AM)) return false;
2773  if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2774  return true;
2775  }
2776  case Intrinsic::dbg_declare: {
2777  const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
2778  X86AddressMode AM;
2779  assert(DI->getAddress() && "Null address should be checked earlier!");
2780  if (!X86SelectAddress(DI->getAddress(), AM))
2781  return false;
2782  const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2783  // FIXME may need to add RegState::Debug to any registers produced,
2784  // although ESP/EBP should be the only ones at the moment.
2786  "Expected inlined-at fields to agree");
2787  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
2788  .addImm(0)
2789  .addMetadata(DI->getVariable())
2790  .addMetadata(DI->getExpression());
2791  return true;
2792  }
2793  case Intrinsic::trap: {
2794  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
2795  return true;
2796  }
2797  case Intrinsic::sqrt: {
2798  if (!Subtarget->hasSSE1())
2799  return false;
2800 
2801  Type *RetTy = II->getCalledFunction()->getReturnType();
2802 
2803  MVT VT;
2804  if (!isTypeLegal(RetTy, VT))
2805  return false;
2806 
2807  // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2808  // is not generated by FastISel yet.
2809  // FIXME: Update this code once tablegen can handle it.
2810  static const uint16_t SqrtOpc[3][2] = {
2811  { X86::SQRTSSr, X86::SQRTSDr },
2812  { X86::VSQRTSSr, X86::VSQRTSDr },
2813  { X86::VSQRTSSZr, X86::VSQRTSDZr },
2814  };
2815  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
2816  Subtarget->hasAVX() ? 1 :
2817  0;
2818  unsigned Opc;
2819  switch (VT.SimpleTy) {
2820  default: return false;
2821  case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break;
2822  case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break;
2823  }
2824 
2825  const Value *SrcVal = II->getArgOperand(0);
2826  unsigned SrcReg = getRegForValue(SrcVal);
2827 
2828  if (SrcReg == 0)
2829  return false;
2830 
2831  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
2832  unsigned ImplicitDefReg = 0;
2833  if (AVXLevel > 0) {
2834  ImplicitDefReg = createResultReg(RC);
2835  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2836  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2837  }
2838 
2839  unsigned ResultReg = createResultReg(RC);
2840  MachineInstrBuilder MIB;
2841  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2842  ResultReg);
2843 
2844  if (ImplicitDefReg)
2845  MIB.addReg(ImplicitDefReg);
2846 
2847  MIB.addReg(SrcReg);
2848 
2849  updateValueMap(II, ResultReg);
2850  return true;
2851  }
2852  case Intrinsic::sadd_with_overflow:
2853  case Intrinsic::uadd_with_overflow:
2854  case Intrinsic::ssub_with_overflow:
2855  case Intrinsic::usub_with_overflow:
2856  case Intrinsic::smul_with_overflow:
2857  case Intrinsic::umul_with_overflow: {
2858  // This implements the basic lowering of the xalu with overflow intrinsics
2859  // into add/sub/mul followed by either seto or setb.
2860  const Function *Callee = II->getCalledFunction();
2861  auto *Ty = cast<StructType>(Callee->getReturnType());
2862  Type *RetTy = Ty->getTypeAtIndex(0U);
2863  assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
2864  Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2865  "Overflow value expected to be an i1");
2866 
2867  MVT VT;
2868  if (!isTypeLegal(RetTy, VT))
2869  return false;
2870 
2871  if (VT < MVT::i8 || VT > MVT::i64)
2872  return false;
2873 
2874  const Value *LHS = II->getArgOperand(0);
2875  const Value *RHS = II->getArgOperand(1);
2876 
2877  // Canonicalize immediate to the RHS.
2878  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2879  isCommutativeIntrinsic(II))
2880  std::swap(LHS, RHS);
2881 
2882  unsigned BaseOpc, CondCode;
2883  switch (II->getIntrinsicID()) {
2884  default: llvm_unreachable("Unexpected intrinsic!");
2885  case Intrinsic::sadd_with_overflow:
2886  BaseOpc = ISD::ADD; CondCode = X86::COND_O; break;
2887  case Intrinsic::uadd_with_overflow:
2888  BaseOpc = ISD::ADD; CondCode = X86::COND_B; break;
2889  case Intrinsic::ssub_with_overflow:
2890  BaseOpc = ISD::SUB; CondCode = X86::COND_O; break;
2891  case Intrinsic::usub_with_overflow:
2892  BaseOpc = ISD::SUB; CondCode = X86::COND_B; break;
2893  case Intrinsic::smul_with_overflow:
2894  BaseOpc = X86ISD::SMUL; CondCode = X86::COND_O; break;
2895  case Intrinsic::umul_with_overflow:
2896  BaseOpc = X86ISD::UMUL; CondCode = X86::COND_O; break;
2897  }
2898 
2899  unsigned LHSReg = getRegForValue(LHS);
2900  if (LHSReg == 0)
2901  return false;
2902  bool LHSIsKill = hasTrivialKill(LHS);
2903 
2904  unsigned ResultReg = 0;
2905  // Check if we have an immediate version.
2906  if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2907  static const uint16_t Opc[2][4] = {
2908  { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2909  { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2910  };
2911 
2912  if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
2913  CondCode == X86::COND_O) {
2914  // We can use INC/DEC.
2915  ResultReg = createResultReg(TLI.getRegClassFor(VT));
2916  bool IsDec = BaseOpc == ISD::SUB;
2917  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2918  TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2919  .addReg(LHSReg, getKillRegState(LHSIsKill));
2920  } else
2921  ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
2922  CI->getZExtValue());
2923  }
2924 
2925  unsigned RHSReg;
2926  bool RHSIsKill;
2927  if (!ResultReg) {
2928  RHSReg = getRegForValue(RHS);
2929  if (RHSReg == 0)
2930  return false;
2931  RHSIsKill = hasTrivialKill(RHS);
2932  ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
2933  RHSIsKill);
2934  }
2935 
2936  // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2937  // it manually.
2938  if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2939  static const uint16_t MULOpc[] =
2940  { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2941  static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2942  // First copy the first operand into RAX, which is an implicit input to
2943  // the X86::MUL*r instruction.
2944  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2945  TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2946  .addReg(LHSReg, getKillRegState(LHSIsKill));
2947  ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2948  TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
2949  } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2950  static const uint16_t MULOpc[] =
2951  { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2952  if (VT == MVT::i8) {
2953  // Copy the first operand into AL, which is an implicit input to the
2954  // X86::IMUL8r instruction.
2955  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2956  TII.get(TargetOpcode::COPY), X86::AL)
2957  .addReg(LHSReg, getKillRegState(LHSIsKill));
2958  ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
2959  RHSIsKill);
2960  } else
2961  ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2962  TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
2963  RHSReg, RHSIsKill);
2964  }
2965 
2966  if (!ResultReg)
2967  return false;
2968 
2969  // Assign to a GPR since the overflow return value is lowered to a SETcc.
2970  unsigned ResultReg2 = createResultReg(&X86::GR8RegClass);
2971  assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2972  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2973  ResultReg2).addImm(CondCode);
2974 
2975  updateValueMap(II, ResultReg, 2);
2976  return true;
2977  }
2978  case Intrinsic::x86_sse_cvttss2si:
2979  case Intrinsic::x86_sse_cvttss2si64:
2980  case Intrinsic::x86_sse2_cvttsd2si:
2981  case Intrinsic::x86_sse2_cvttsd2si64: {
2982  bool IsInputDouble;
2983  switch (II->getIntrinsicID()) {
2984  default: llvm_unreachable("Unexpected intrinsic.");
2985  case Intrinsic::x86_sse_cvttss2si:
2986  case Intrinsic::x86_sse_cvttss2si64:
2987  if (!Subtarget->hasSSE1())
2988  return false;
2989  IsInputDouble = false;
2990  break;
2991  case Intrinsic::x86_sse2_cvttsd2si:
2992  case Intrinsic::x86_sse2_cvttsd2si64:
2993  if (!Subtarget->hasSSE2())
2994  return false;
2995  IsInputDouble = true;
2996  break;
2997  }
2998 
2999  Type *RetTy = II->getCalledFunction()->getReturnType();
3000  MVT VT;
3001  if (!isTypeLegal(RetTy, VT))
3002  return false;
3003 
3004  static const uint16_t CvtOpc[3][2][2] = {
3005  { { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr },
3006  { X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } },
3007  { { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr },
3008  { X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } },
3009  { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr },
3010  { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } },
3011  };
3012  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
3013  Subtarget->hasAVX() ? 1 :
3014  0;
3015  unsigned Opc;
3016  switch (VT.SimpleTy) {
3017  default: llvm_unreachable("Unexpected result type.");
3018  case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0]; break;
3019  case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1]; break;
3020  }
3021 
3022  // Check if we can fold insertelement instructions into the convert.
3023  const Value *Op = II->getArgOperand(0);
3024  while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
3025  const Value *Index = IE->getOperand(2);
3026  if (!isa<ConstantInt>(Index))
3027  break;
3028  unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
3029 
3030  if (Idx == 0) {
3031  Op = IE->getOperand(1);
3032  break;
3033  }
3034  Op = IE->getOperand(0);
3035  }
3036 
3037  unsigned Reg = getRegForValue(Op);
3038  if (Reg == 0)
3039  return false;
3040 
3041  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3042  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3043  .addReg(Reg);
3044 
3045  updateValueMap(II, ResultReg);
3046  return true;
3047  }
3048  }
3049 }
3050 
3051 bool X86FastISel::fastLowerArguments() {
3052  if (!FuncInfo.CanLowerReturn)
3053  return false;
3054 
3055  const Function *F = FuncInfo.Fn;
3056  if (F->isVarArg())
3057  return false;
3058 
3059  CallingConv::ID CC = F->getCallingConv();
3060  if (CC != CallingConv::C)
3061  return false;
3062 
3063  if (Subtarget->isCallingConvWin64(CC))
3064  return false;
3065 
3066  if (!Subtarget->is64Bit())
3067  return false;
3068 
3069  if (Subtarget->useSoftFloat())
3070  return false;
3071 
3072  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3073  unsigned GPRCnt = 0;
3074  unsigned FPRCnt = 0;
3075  for (auto const &Arg : F->args()) {
3076  if (Arg.hasAttribute(Attribute::ByVal) ||
3077  Arg.hasAttribute(Attribute::InReg) ||
3078  Arg.hasAttribute(Attribute::StructRet) ||
3079  Arg.hasAttribute(Attribute::SwiftSelf) ||
3080  Arg.hasAttribute(Attribute::SwiftError) ||
3081  Arg.hasAttribute(Attribute::Nest))
3082  return false;
3083 
3084  Type *ArgTy = Arg.getType();
3085  if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3086  return false;
3087 
3088  EVT ArgVT = TLI.getValueType(DL, ArgTy);
3089  if (!ArgVT.isSimple()) return false;
3090  switch (ArgVT.getSimpleVT().SimpleTy) {
3091  default: return false;
3092  case MVT::i32:
3093  case MVT::i64:
3094  ++GPRCnt;
3095  break;
3096  case MVT::f32:
3097  case MVT::f64:
3098  if (!Subtarget->hasSSE1())
3099  return false;
3100  ++FPRCnt;
3101  break;
3102  }
3103 
3104  if (GPRCnt > 6)
3105  return false;
3106 
3107  if (FPRCnt > 8)
3108  return false;
3109  }
3110 
3111  static const MCPhysReg GPR32ArgRegs[] = {
3112  X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
3113  };
3114  static const MCPhysReg GPR64ArgRegs[] = {
3115  X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3116  };
3117  static const MCPhysReg XMMArgRegs[] = {
3118  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3119  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3120  };
3121 
3122  unsigned GPRIdx = 0;
3123  unsigned FPRIdx = 0;
3124  for (auto const &Arg : F->args()) {
3125  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3126  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
3127  unsigned SrcReg;
3128  switch (VT.SimpleTy) {
3129  default: llvm_unreachable("Unexpected value type.");
3130  case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
3131  case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
3132  case MVT::f32: LLVM_FALLTHROUGH;
3133  case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
3134  }
3135  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3136  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3137  // Without this, EmitLiveInCopies may eliminate the livein if its only
3138  // use is a bitcast (which isn't turned into an instruction).
3139  unsigned ResultReg = createResultReg(RC);
3140  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3141  TII.get(TargetOpcode::COPY), ResultReg)
3142  .addReg(DstReg, getKillRegState(true));
3143  updateValueMap(&Arg, ResultReg);
3144  }
3145  return true;
3146 }
3147 
3148 static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
3149  CallingConv::ID CC,
3150  ImmutableCallSite *CS) {
3151  if (Subtarget->is64Bit())
3152  return 0;
3153  if (Subtarget->getTargetTriple().isOSMSVCRT())
3154  return 0;
3155  if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3156  CC == CallingConv::HiPE)
3157  return 0;
3158 
3159  if (CS)
3160  if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) ||
3161  CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
3162  return 0;
3163 
3164  return 4;
3165 }
3166 
3167 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3168  auto &OutVals = CLI.OutVals;
3169  auto &OutFlags = CLI.OutFlags;
3170  auto &OutRegs = CLI.OutRegs;
3171  auto &Ins = CLI.Ins;
3172  auto &InRegs = CLI.InRegs;
3173  CallingConv::ID CC = CLI.CallConv;
3174  bool &IsTailCall = CLI.IsTailCall;
3175  bool IsVarArg = CLI.IsVarArg;
3176  const Value *Callee = CLI.Callee;
3177  MCSymbol *Symbol = CLI.Symbol;
3178 
3179  bool Is64Bit = Subtarget->is64Bit();
3180  bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3181 
3182  const CallInst *CI =
3183  CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
3184  const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr;
3185 
3186  // Call / invoke instructions with NoCfCheck attribute require special
3187  // handling.
3188  const auto *II =
3189  CLI.CS ? dyn_cast<InvokeInst>(CLI.CS->getInstruction()) : nullptr;
3190  if ((CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck()))
3191  return false;
3192 
3193  // Functions with no_caller_saved_registers that need special handling.
3194  if ((CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3195  (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
3196  return false;
3197 
3198  // Functions using retpoline for indirect calls need to use SDISel.
3199  if (Subtarget->useRetpolineIndirectCalls())
3200  return false;
3201 
3202  // Handle only C, fastcc, and webkit_js calling conventions for now.
3203  switch (CC) {
3204  default: return false;
3205  case CallingConv::C:
3206  case CallingConv::Fast:
3208  case CallingConv::Swift:
3212  case CallingConv::Win64:
3214  break;
3215  }
3216 
3217  // Allow SelectionDAG isel to handle tail calls.
3218  if (IsTailCall)
3219  return false;
3220 
3221  // fastcc with -tailcallopt is intended to provide a guaranteed
3222  // tail call optimization. Fastisel doesn't know how to do that.
3223  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
3224  return false;
3225 
3226  // Don't know how to handle Win64 varargs yet. Nothing special needed for
3227  // x86-32. Special handling for x86-64 is implemented.
3228  if (IsVarArg && IsWin64)
3229  return false;
3230 
3231  // Don't know about inalloca yet.
3232  if (CLI.CS && CLI.CS->hasInAllocaArgument())
3233  return false;
3234 
3235  for (auto Flag : CLI.OutFlags)
3236  if (Flag.isSwiftError())
3237  return false;
3238 
3239  SmallVector<MVT, 16> OutVTs;
3240  SmallVector<unsigned, 16> ArgRegs;
3241 
3242  // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3243  // instruction. This is safe because it is common to all FastISel supported
3244  // calling conventions on x86.
3245  for (int i = 0, e = OutVals.size(); i != e; ++i) {
3246  Value *&Val = OutVals[i];
3247  ISD::ArgFlagsTy Flags = OutFlags[i];
3248  if (auto *CI = dyn_cast<ConstantInt>(Val)) {
3249  if (CI->getBitWidth() < 32) {
3250  if (Flags.isSExt())
3252  else
3254  }
3255  }
3256 
3257  // Passing bools around ends up doing a trunc to i1 and passing it.
3258  // Codegen this as an argument + "and 1".
3259  MVT VT;
3260  auto *TI = dyn_cast<TruncInst>(Val);
3261  unsigned ResultReg;
3262  if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
3263  (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
3264  TI->hasOneUse()) {
3265  Value *PrevVal = TI->getOperand(0);
3266  ResultReg = getRegForValue(PrevVal);
3267 
3268  if (!ResultReg)
3269  return false;
3270 
3271  if (!isTypeLegal(PrevVal->getType(), VT))
3272  return false;
3273 
3274  ResultReg =
3275  fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
3276  } else {
3277  if (!isTypeLegal(Val->getType(), VT))
3278  return false;
3279  ResultReg = getRegForValue(Val);
3280  }
3281 
3282  if (!ResultReg)
3283  return false;
3284 
3285  ArgRegs.push_back(ResultReg);
3286  OutVTs.push_back(VT);
3287  }
3288 
3289  // Analyze operands of the call, assigning locations to each operand.
3291  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
3292 
3293  // Allocate shadow area for Win64
3294  if (IsWin64)
3295  CCInfo.AllocateStack(32, 8);
3296 
3297  CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
3298 
3299  // Get a count of how many bytes are to be pushed on the stack.
3300  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3301 
3302  // Issue CALLSEQ_START
3303  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3304  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3305  .addImm(NumBytes).addImm(0).addImm(0);
3306 
3307  // Walk the register/memloc assignments, inserting copies/loads.
3308  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3309  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3310  CCValAssign const &VA = ArgLocs[i];
3311  const Value *ArgVal = OutVals[VA.getValNo()];
3312  MVT ArgVT = OutVTs[VA.getValNo()];
3313 
3314  if (ArgVT == MVT::x86mmx)
3315  return false;
3316 
3317  unsigned ArgReg = ArgRegs[VA.getValNo()];
3318 
3319  // Promote the value if needed.
3320  switch (VA.getLocInfo()) {
3321  case CCValAssign::Full: break;
3322  case CCValAssign::SExt: {
3323  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3324  "Unexpected extend");
3325 
3326  if (ArgVT == MVT::i1)
3327  return false;
3328 
3329  bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3330  ArgVT, ArgReg);
3331  assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
3332  ArgVT = VA.getLocVT();
3333  break;
3334  }
3335  case CCValAssign::ZExt: {
3336  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3337  "Unexpected extend");
3338 
3339  // Handle zero-extension from i1 to i8, which is common.
3340  if (ArgVT == MVT::i1) {
3341  // Set the high bits to zero.
3342  ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
3343  ArgVT = MVT::i8;
3344 
3345  if (ArgReg == 0)
3346  return false;
3347  }
3348 
3349  bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3350  ArgVT, ArgReg);
3351  assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
3352  ArgVT = VA.getLocVT();
3353  break;
3354  }
3355  case CCValAssign::AExt: {
3356  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3357  "Unexpected extend");
3358  bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
3359  ArgVT, ArgReg);
3360  if (!Emitted)
3361  Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3362  ArgVT, ArgReg);
3363  if (!Emitted)
3364  Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3365  ArgVT, ArgReg);
3366 
3367  assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
3368  ArgVT = VA.getLocVT();
3369  break;
3370  }
3371  case CCValAssign::BCvt: {
3372  ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
3373  /*TODO: Kill=*/false);
3374  assert(ArgReg && "Failed to emit a bitcast!");
3375  ArgVT = VA.getLocVT();
3376  break;
3377  }
3378  case CCValAssign::VExt:
3379  // VExt has not been implemented, so this should be impossible to reach
3380  // for now. However, fallback to Selection DAG isel once implemented.
3381  return false;
3385  case CCValAssign::FPExt:
3386  llvm_unreachable("Unexpected loc info!");
3387  case CCValAssign::Indirect:
3388  // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3389  // support this.
3390  return false;
3391  }
3392 
3393  if (VA.isRegLoc()) {
3394  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3395  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3396  OutRegs.push_back(VA.getLocReg());
3397  } else {
3398  assert(VA.isMemLoc());
3399 
3400  // Don't emit stores for undef values.
3401  if (isa<UndefValue>(ArgVal))
3402  continue;
3403 
3404  unsigned LocMemOffset = VA.getLocMemOffset();
3405  X86AddressMode AM;
3406  AM.Base.Reg = RegInfo->getStackRegister();
3407  AM.Disp = LocMemOffset;
3408  ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
3409  unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3410  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3411  MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
3412  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3413  if (Flags.isByVal()) {
3414  X86AddressMode SrcAM;
3415  SrcAM.Base.Reg = ArgReg;
3416  if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
3417  return false;
3418  } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3419  // If this is a really simple value, emit this with the Value* version
3420  // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3421  // as it can cause us to reevaluate the argument.
3422  if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3423  return false;
3424  } else {
3425  bool ValIsKill = hasTrivialKill(ArgVal);
3426  if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
3427  return false;
3428  }
3429  }
3430  }
3431 
3432  // ELF / PIC requires GOT in the EBX register before function calls via PLT
3433  // GOT pointer.
3434  if (Subtarget->isPICStyleGOT()) {
3435  unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3436  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3437  TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
3438  }
3439 
3440  if (Is64Bit && IsVarArg && !IsWin64) {
3441  // From AMD64 ABI document:
3442  // For calls that may call functions that use varargs or stdargs
3443  // (prototype-less calls or calls to functions containing ellipsis (...) in
3444  // the declaration) %al is used as hidden argument to specify the number
3445  // of SSE registers used. The contents of %al do not need to match exactly
3446  // the number of registers, but must be an ubound on the number of SSE
3447  // registers used and is in the range 0 - 8 inclusive.
3448 
3449  // Count the number of XMM registers allocated.
3450  static const MCPhysReg XMMArgRegs[] = {
3451  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3452  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3453  };
3454  unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3455  assert((Subtarget->hasSSE1() || !NumXMMRegs)
3456  && "SSE registers cannot be used when SSE is disabled");
3457  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
3458  X86::AL).addImm(NumXMMRegs);
3459  }
3460 
3461  // Materialize callee address in a register. FIXME: GV address can be
3462  // handled with a CALLpcrel32 instead.
3463  X86AddressMode CalleeAM;
3464  if (!X86SelectCallAddress(Callee, CalleeAM))
3465  return false;
3466 
3467  unsigned CalleeOp = 0;
3468  const GlobalValue *GV = nullptr;
3469  if (CalleeAM.GV != nullptr) {
3470  GV = CalleeAM.GV;
3471  } else if (CalleeAM.Base.Reg != 0) {
3472  CalleeOp = CalleeAM.Base.Reg;
3473  } else
3474  return false;
3475 
3476  // Issue the call.
3477  MachineInstrBuilder MIB;
3478  if (CalleeOp) {
3479  // Register-indirect call.
3480  unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3481  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
3482  .addReg(CalleeOp);
3483  } else {
3484  // Direct call.
3485  assert(GV && "Not a direct call");
3486  // See if we need any target-specific flags on the GV operand.
3487  unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3488 
3489  // This will be a direct call, or an indirect call through memory for
3490  // NonLazyBind calls or dllimport calls.
3491  bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT ||
3492  OpFlags == X86II::MO_GOTPCREL ||
3493  OpFlags == X86II::MO_COFFSTUB;
3494  unsigned CallOpc = NeedLoad
3495  ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3496  : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3497 
3498  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
3499  if (NeedLoad)
3500  MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
3501  if (Symbol)
3502  MIB.addSym(Symbol, OpFlags);
3503  else
3504  MIB.addGlobalAddress(GV, 0, OpFlags);
3505  if (NeedLoad)
3506  MIB.addReg(0);
3507  }
3508 
3509  // Add a register mask operand representing the call-preserved registers.
3510  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3511  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3512 
3513  // Add an implicit use GOT pointer in EBX.
3514  if (Subtarget->isPICStyleGOT())
3516 
3517  if (Is64Bit && IsVarArg && !IsWin64)
3519 
3520  // Add implicit physical register uses to the call.
3521  for (auto Reg : OutRegs)
3523 
3524  // Issue CALLSEQ_END
3525  unsigned NumBytesForCalleeToPop =
3526  X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
3527  TM.Options.GuaranteedTailCallOpt)
3528  ? NumBytes // Callee pops everything.
3529  : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CS);
3530  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3531  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3532  .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
3533 
3534  // Now handle call return values.
3536  CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3537  CLI.RetTy->getContext());
3538  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3539 
3540  // Copy all of the result registers out of their specified physreg.
3541  unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3542  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3543  CCValAssign &VA = RVLocs[i];
3544  EVT CopyVT = VA.getValVT();
3545  unsigned CopyReg = ResultReg + i;
3546  unsigned SrcReg = VA.getLocReg();
3547 
3548  // If this is x86-64, and we disabled SSE, we can't return FP values
3549  if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3550  ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3551  report_fatal_error("SSE register return with SSE disabled");
3552  }
3553 
3554  // If we prefer to use the value in xmm registers, copy it out as f80 and
3555  // use a truncate to move it from fp stack reg to xmm reg.
3556  if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3557  isScalarFPTypeInSSEReg(VA.getValVT())) {
3558  CopyVT = MVT::f80;
3559  CopyReg = createResultReg(&X86::RFP80RegClass);
3560  }
3561 
3562  // Copy out the result.
3563  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3564  TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
3565  InRegs.push_back(VA.getLocReg());
3566 
3567  // Round the f80 to the right size, which also moves it to the appropriate
3568  // xmm register. This is accomplished by storing the f80 value in memory
3569  // and then loading it back.
3570  if (CopyVT != VA.getValVT()) {
3571  EVT ResVT = VA.getValVT();
3572  unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3573  unsigned MemSize = ResVT.getSizeInBits()/8;
3574  int FI = MFI.CreateStackObject(MemSize, MemSize, false);
3575  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3576  TII.get(Opc)), FI)
3577  .addReg(CopyReg);
3578  Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
3579  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3580  TII.get(Opc), ResultReg + i), FI);
3581  }
3582  }
3583 
3584  CLI.ResultReg = ResultReg;
3585  CLI.NumResultRegs = RVLocs.size();
3586  CLI.Call = MIB;
3587 
3588  return true;
3589 }
3590 
3591 bool
3592 X86FastISel::fastSelectInstruction(const Instruction *I) {
3593  switch (I->getOpcode()) {
3594  default: break;
3595  case Instruction::Load:
3596  return X86SelectLoad(I);
3597  case Instruction::Store:
3598  return X86SelectStore(I);
3599  case Instruction::Ret:
3600  return X86SelectRet(I);
3601  case Instruction::ICmp:
3602  case Instruction::FCmp:
3603  return X86SelectCmp(I);
3604  case Instruction::ZExt:
3605  return X86SelectZExt(I);
3606  case Instruction::SExt:
3607  return X86SelectSExt(I);
3608  case Instruction::Br:
3609  return X86SelectBranch(I);
3610  case Instruction::LShr:
3611  case Instruction::AShr:
3612  case Instruction::Shl:
3613  return X86SelectShift(I);
3614  case Instruction::SDiv:
3615  case Instruction::UDiv:
3616  case Instruction::SRem:
3617  case Instruction::URem:
3618  return X86SelectDivRem(I);
3619  case Instruction::Select:
3620  return X86SelectSelect(I);
3621  case Instruction::Trunc:
3622  return X86SelectTrunc(I);
3623  case Instruction::FPExt:
3624  return X86SelectFPExt(I);
3625  case Instruction::FPTrunc:
3626  return X86SelectFPTrunc(I);
3627  case Instruction::SIToFP:
3628  return X86SelectSIToFP(I);
3629  case Instruction::UIToFP:
3630  return X86SelectUIToFP(I);
3631  case Instruction::IntToPtr: // Deliberate fall-through.
3632  case Instruction::PtrToInt: {
3633  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3634  EVT DstVT = TLI.getValueType(DL, I->getType());
3635  if (DstVT.bitsGT(SrcVT))
3636  return X86SelectZExt(I);
3637  if (DstVT.bitsLT(SrcVT))
3638  return X86SelectTrunc(I);
3639  unsigned Reg = getRegForValue(I->getOperand(0));
3640  if (Reg == 0) return false;
3641  updateValueMap(I, Reg);
3642  return true;
3643  }
3644  case Instruction::BitCast: {
3645  // Select SSE2/AVX bitcasts between 128/256 bit vector types.
3646  if (!Subtarget->hasSSE2())
3647  return false;
3648 
3649  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3650  EVT DstVT = TLI.getValueType(DL, I->getType());
3651 
3652  if (!SrcVT.isSimple() || !DstVT.isSimple())
3653  return false;
3654 
3655  MVT SVT = SrcVT.getSimpleVT();
3656  MVT DVT = DstVT.getSimpleVT();
3657 
3658  if (!SVT.is128BitVector() &&
3659  !(Subtarget->hasAVX() && SVT.is256BitVector()) &&
3660  !(Subtarget->hasAVX512() && SVT.is512BitVector() &&
3661  (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 &&
3662  DVT.getScalarSizeInBits() >= 32))))
3663  return false;
3664 
3665  unsigned Reg = getRegForValue(I->getOperand(0));
3666  if (Reg == 0)
3667  return false;
3668 
3669  // No instruction is needed for conversion. Reuse the register used by
3670  // the fist operand.
3671  updateValueMap(I, Reg);
3672  return true;
3673  }
3674  }
3675 
3676  return false;
3677 }
3678 
3679 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
3680  if (VT > MVT::i64)
3681  return 0;
3682 
3683  uint64_t Imm = CI->getZExtValue();
3684  if (Imm == 0) {
3685  unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3686  switch (VT.SimpleTy) {
3687  default: llvm_unreachable("Unexpected value type");
3688  case MVT::i1:
3689  case MVT::i8:
3690  return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
3691  X86::sub_8bit);
3692  case MVT::i16:
3693  return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
3694  X86::sub_16bit);
3695  case MVT::i32:
3696  return SrcReg;
3697  case MVT::i64: {
3698  unsigned ResultReg = createResultReg(&X86::GR64RegClass);
3699  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3700  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3701  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3702  return ResultReg;
3703  }
3704  }
3705  }
3706 
3707  unsigned Opc = 0;
3708  switch (VT.SimpleTy) {
3709  default: llvm_unreachable("Unexpected value type");
3710  case MVT::i1:
3711  VT = MVT::i8;
3713  case MVT::i8: Opc = X86::MOV8ri; break;
3714  case MVT::i16: Opc = X86::MOV16ri; break;
3715  case MVT::i32: Opc = X86::MOV32ri; break;
3716  case MVT::i64: {
3717  if (isUInt<32>(Imm))
3718  Opc = X86::MOV32ri64;
3719  else if (isInt<32>(Imm))
3720  Opc = X86::MOV64ri32;
3721  else
3722  Opc = X86::MOV64ri;
3723  break;
3724  }
3725  }
3726  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3727 }
3728 
3729 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
3730  if (CFP->isNullValue())
3731  return fastMaterializeFloatZero(CFP);
3732 
3733  // Can't handle alternate code models yet.
3734  CodeModel::Model CM = TM.getCodeModel();
3735  if (CM != CodeModel::Small && CM != CodeModel::Large)
3736  return 0;
3737 
3738  // Get opcode and regclass of the output for the given load instruction.
3739  unsigned Opc = 0;
3740  bool HasAVX = Subtarget->hasAVX();
3741  bool HasAVX512 = Subtarget->hasAVX512();
3742  switch (VT.SimpleTy) {
3743  default: return 0;
3744  case MVT::f32:
3745  if (X86ScalarSSEf32)
3746  Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
3747  else
3748  Opc = X86::LD_Fp32m;
3749  break;
3750  case MVT::f64:
3751  if (X86ScalarSSEf64)
3752  Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
3753  else
3754  Opc = X86::LD_Fp64m;
3755  break;
3756  case MVT::f80:
3757  // No f80 support yet.
3758  return 0;
3759  }
3760 
3761  // MachineConstantPool wants an explicit alignment.
3762  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
3763  if (Align == 0) {
3764  // Alignment of vector types. FIXME!
3765  Align = DL.getTypeAllocSize(CFP->getType());
3766  }
3767 
3768  // x86-32 PIC requires a PIC base register for constant pools.
3769  unsigned PICBase = 0;
3770  unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
3771  if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
3772  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3773  else if (OpFlag == X86II::MO_GOTOFF)
3774  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3775  else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
3776  PICBase = X86::RIP;
3777 
3778  // Create the load from the constant pool.
3779  unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
3780  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
3781 
3782  if (CM == CodeModel::Large) {
3783  unsigned AddrReg = createResultReg(&X86::GR64RegClass);
3784  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3785  AddrReg)
3786  .addConstantPoolIndex(CPI, 0, OpFlag);
3787  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3788  TII.get(Opc), ResultReg);
3789  addDirectMem(MIB, AddrReg);
3790  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3792  MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
3793  MIB->addMemOperand(*FuncInfo.MF, MMO);
3794  return ResultReg;
3795  }
3796 
3797  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3798  TII.get(Opc), ResultReg),
3799  CPI, PICBase, OpFlag);
3800  return ResultReg;
3801 }
3802 
3803 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
3804  // Can't handle alternate code models yet.
3805  if (TM.getCodeModel() != CodeModel::Small)
3806  return 0;
3807 
3808  // Materialize addresses with LEA/MOV instructions.
3809  X86AddressMode AM;
3810  if (X86SelectAddress(GV, AM)) {
3811  // If the expression is just a basereg, then we're done, otherwise we need
3812  // to emit an LEA.
3813  if (AM.BaseType == X86AddressMode::RegBase &&
3814  AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3815  return AM.Base.Reg;
3816 
3817  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3818  if (TM.getRelocationModel() == Reloc::Static &&
3819  TLI.getPointerTy(DL) == MVT::i64) {
3820  // The displacement code could be more than 32 bits away so we need to use
3821  // an instruction with a 64 bit immediate
3822  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3823  ResultReg)
3824  .addGlobalAddress(GV);
3825  } else {
3826  unsigned Opc =
3827  TLI.getPointerTy(DL) == MVT::i32
3828  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3829  : X86::LEA64r;
3830  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3831  TII.get(Opc), ResultReg), AM);
3832  }
3833  return ResultReg;
3834  }
3835  return 0;
3836 }
3837 
3838 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
3839  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
3840 
3841  // Only handle simple types.
3842  if (!CEVT.isSimple())
3843  return 0;
3844  MVT VT = CEVT.getSimpleVT();
3845 
3846  if (const auto *CI = dyn_cast<ConstantInt>(C))
3847  return X86MaterializeInt(CI, VT);
3848  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
3849  return X86MaterializeFP(CFP, VT);
3850  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
3851  return X86MaterializeGV(GV, VT);
3852 
3853  return 0;
3854 }
3855 
3856 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
3857  // Fail on dynamic allocas. At this point, getRegForValue has already
3858  // checked its CSE maps, so if we're here trying to handle a dynamic
3859  // alloca, we're not going to succeed. X86SelectAddress has a
3860  // check for dynamic allocas, because it's called directly from
3861  // various places, but targetMaterializeAlloca also needs a check
3862  // in order to avoid recursion between getRegForValue,
3863  // X86SelectAddrss, and targetMaterializeAlloca.
3864  if (!FuncInfo.StaticAllocaMap.count(C))
3865  return 0;
3866  assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3867 
3868  X86AddressMode AM;
3869  if (!X86SelectAddress(C, AM))
3870  return 0;
3871  unsigned Opc =
3872  TLI.getPointerTy(DL) == MVT::i32
3873  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3874  : X86::LEA64r;
3875  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
3876  unsigned ResultReg = createResultReg(RC);
3877  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3878  TII.get(Opc), ResultReg), AM);
3879  return ResultReg;
3880 }
3881 
3882 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
3883  MVT VT;
3884  if (!isTypeLegal(CF->getType(), VT))
3885  return 0;
3886 
3887  // Get opcode and regclass for the given zero.
3888  bool HasAVX512 = Subtarget->hasAVX512();
3889  unsigned Opc = 0;
3890  switch (VT.SimpleTy) {
3891  default: return 0;
3892  case MVT::f32:
3893  if (X86ScalarSSEf32)
3894  Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS;
3895  else
3896  Opc = X86::LD_Fp032;
3897  break;
3898  case MVT::f64:
3899  if (X86ScalarSSEf64)
3900  Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD;
3901  else
3902  Opc = X86::LD_Fp064;
3903  break;
3904  case MVT::f80:
3905  // No f80 support yet.
3906  return 0;
3907  }
3908 
3909  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3910  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
3911  return ResultReg;
3912 }
3913 
3914 
3915 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3916  const LoadInst *LI) {
3917  const Value *Ptr = LI->getPointerOperand();
3918  X86AddressMode AM;
3919  if (!X86SelectAddress(Ptr, AM))
3920  return false;
3921 
3922  const X86InstrInfo &XII = (const X86InstrInfo &)TII;
3923 
3924  unsigned Size = DL.getTypeAllocSize(LI->getType());
3925  unsigned Alignment = LI->getAlignment();
3926 
3927  if (Alignment == 0) // Ensure that codegen never sees alignment 0
3928  Alignment = DL.getABITypeAlignment(LI->getType());
3929 
3931  AM.getFullAddress(AddrOps);
3932 
3933  MachineInstr *Result = XII.foldMemoryOperandImpl(
3934  *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
3935  /*AllowCommute=*/true);
3936  if (!Result)
3937  return false;
3938 
3939  // The index register could be in the wrong register class. Unfortunately,
3940  // foldMemoryOperandImpl could have commuted the instruction so its not enough
3941  // to just look at OpNo + the offset to the index reg. We actually need to
3942  // scan the instruction to find the index reg and see if its the correct reg
3943  // class.
3944  unsigned OperandNo = 0;
3945  for (MachineInstr::mop_iterator I = Result->operands_begin(),
3946  E = Result->operands_end(); I != E; ++I, ++OperandNo) {
3947  MachineOperand &MO = *I;
3948  if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
3949  continue;
3950  // Found the index reg, now try to rewrite it.
3951  unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
3952  MO.getReg(), OperandNo);
3953  if (IndexReg == MO.getReg())
3954  continue;
3955  MO.setReg(IndexReg);
3956  }
3957 
3958  Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3959  Result->cloneInstrSymbols(*FuncInfo.MF, *MI);
3961  removeDeadCode(I, std::next(I));
3962  return true;
3963 }
3964 
3965 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
3966  const TargetRegisterClass *RC,
3967  unsigned Op0, bool Op0IsKill,
3968  unsigned Op1, bool Op1IsKill,
3969  unsigned Op2, bool Op2IsKill,
3970  unsigned Op3, bool Op3IsKill) {
3971  const MCInstrDesc &II = TII.get(MachineInstOpcode);
3972 
3973  unsigned ResultReg = createResultReg(RC);
3974  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
3975  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
3976  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
3977  Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
3978 
3979  if (II.getNumDefs() >= 1)
3980  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
3981  .addReg(Op0, getKillRegState(Op0IsKill))
3982  .addReg(Op1, getKillRegState(Op1IsKill))
3983  .addReg(Op2, getKillRegState(Op2IsKill))
3984  .addReg(Op3, getKillRegState(Op3IsKill));
3985  else {
3986  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
3987  .addReg(Op0, getKillRegState(Op0IsKill))
3988  .addReg(Op1, getKillRegState(Op1IsKill))
3989  .addReg(Op2, getKillRegState(Op2IsKill))
3990  .addReg(Op3, getKillRegState(Op3IsKill));
3991  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3992  TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
3993  }
3994  return ResultReg;
3995 }
3996 
3997 
3998 namespace llvm {
4000  const TargetLibraryInfo *libInfo) {
4001  return new X86FastISel(funcInfo, libInfo);
4002  }
4003 }
bool hasAVX() const
Definition: X86Subtarget.h:575
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:176
void setFrameAddressIsTaken(bool T)
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:590
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:348
Return a value (possibly void), from a function.
Value * getValueOperand()
Definition: Instructions.h:409
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:536
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:583
mop_iterator operands_end()
Definition: MachineInstr.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:699
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "...
Definition: X86BaseInfo.h:275
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:567
unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, const MachineOperand &RegMO, unsigned OpIdx)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:40
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
addConstantPoolReference - This function is used to add a reference to the base of a constant value s...
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
bool isVector() const
Return true if this is a vector value type.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
union llvm::X86AddressMode::@500 Base
bool is256BitVector() const
Return true if this is a 256-bit vector type.
This class represents a function call, abstracting a target machine&#39;s calling convention.
unsigned Reg
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:715
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:302
unsigned getSourceAddressSpace() const
unsigned getValNo() const
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:725
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
unsigned const TargetRegisterInfo * TRI
const GlobalValue * GV
A debug info location.
Definition: DebugLoc.h:33
F(f)
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const
An instruction for reading from memory.
Definition: Instructions.h:167
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:147
Hexagon Common GEP
Value * getCondition() const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
Value * getLength() const
op_iterator op_begin()
Definition: User.h:229
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:38
bool isMemLoc() const
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:274
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1218
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:720
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:554
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:719
A description of a memory reference used in the backend.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:808
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:210
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
Class to represent struct types.
Definition: DerivedTypes.h:232
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
Definition: X86InstrInfo.h:74
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:716
SimpleValueType SimpleTy
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1660
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
LocInfo getLocInfo() const
unsigned getSizeInBits() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:266
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
Definition: X86InstrInfo.h:91
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1674
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:84
This is a fast-path instruction selection class that generates poor code and doesn&#39;t support illegal ...
Definition: FastISel.h:66
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:405
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
This instruction compares its operands according to the predicate given to the constructor.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:385
An instruction for storing to memory.
Definition: Instructions.h:320
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:990
bool doesNoCfCheck() const
Determine if the call should not perform indirect branch tracking.
Definition: InstrTypes.h:1651
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
This class represents a truncation of integer types.
Value * getOperand(unsigned i) const
Definition: User.h:169
Class to represent pointers.
Definition: DerivedTypes.h:498
unsigned getByValSize() const
unsigned getKillRegState(bool B)
unsigned getStackRegister() const
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:117
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given register size in bytes, and operand type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:228
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:146
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
X86_ThisCall - Similar to X86_StdCall.
Definition: CallingConv.h:110
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:168
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
unsigned const MachineRegisterInfo * MRI
void cloneInstrSymbols(MachineFunction &MF, const MachineInstr &MI)
Clone another MachineInstr&#39;s pre- and post- instruction symbols and replace ours with it...
enum llvm::X86AddressMode::@499 BaseType
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Conditional or Unconditional Branch instruction.
Value * getAddress() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
unsigned getScalarSizeInBits() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:173
DIExpression * getExpression() const
bool isValidLocationForIntrinsic(const DILocation *DL) const
Check that a location is valid for this variable.
op_iterator op_end()
Definition: User.h:231
static std::pair< unsigned, bool > getX86SSEConditionCode(CmpInst::Predicate Predicate)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:709
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:793
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:718
Value * getPointerOperand()
Definition: Instructions.h:284
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:740
static void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, X86AddressMode &AM)
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:726
bool isTargetMCU() const
Definition: X86Subtarget.h:759
Extended Value Type.
Definition: ValueTypes.h:33
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:52
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1350
bool isVolatile() const
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:724
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
bool arg_empty() const
Definition: CallSite.h:225
The memory access writes data.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags...
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:713
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
CCState - This class holds information needed while lowering arguments and return values...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:308
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:723
Provides information about what library functions are available for the current target.
CCValAssign - Represent assignment of one arg/retval to a location.
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
This class wraps the llvm.memcpy intrinsic.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:225
Value * getRawSource() const
Return the arguments to the instruction.
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
amdgpu Simplify well known AMD library false FunctionCallee Callee
This file defines the FastISel class.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:240
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:487
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:490
The memory access reads data.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
Representation of each machine instruction.
Definition: MachineInstr.h:63
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:139
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – t...
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1216
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:321
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:406
unsigned getDestAddressSpace() const
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:576
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:240
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
unsigned getLocMemOffset() const
Establish a view to a call site for examination.
Definition: CallSite.h:892
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1264
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:143
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:717
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
bool is512BitVector() const
Return true if this is a 512-bit vector type.
uint32_t Size
Definition: Profile.cpp:46
DILocalVariable * getVariable() const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:721
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:345
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:365
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegLoc() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasSSE1() const
Definition: X86Subtarget.h:569
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:712
LLVM Value Representation.
Definition: Value.h:72
mop_iterator operands_begin()
Definition: MachineInstr.h:452
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:722
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
If we have a comparison with RHS as the RHS of the comparison, return an opcode that works for the co...
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:153
X86_FastCall - &#39;fast&#39; analog of X86_StdCall.
Definition: CallingConv.h:91
bool hasAVX512() const
Definition: X86Subtarget.h:577
Invoke instruction.
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:125
IRTranslator LLVM IR MI
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Conversion operators.
Definition: ISDOpcodes.h:484
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:156
X86AddressMode - This struct holds a generalized full x86 address mode.
unsigned getLocReg() const
X86_StdCall - stdcall is the calling conventions mostly used by the Win32 API.
Definition: CallingConv.h:86
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:714
This represents the llvm.dbg.declare instruction.
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:149
Value * getPointerOperand()
Definition: Instructions.h:412
unsigned AllocateStack(unsigned Size, unsigned Align)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
Value * getRawDest() const
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:394
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool hasSSE2() const
Definition: X86Subtarget.h:570
iterator_range< arg_iterator > args()
Definition: Function.h:694
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:711
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:217
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
Definition: X86BaseInfo.h:237
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, ImmutableCallSite *CS)
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:220
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
an instruction to allocate memory on the stack
Definition: Instructions.h:59
gep_type_iterator gep_type_begin(const User *GEP)