LLVM  15.0.0git
X86FastISel.cpp
Go to the documentation of this file.
1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the X86-specific support for the FastISel class. Much
10 // of the target-specific code is generated by tablegen in the file
11 // X86GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86CallingConv.h"
17 #include "X86InstrBuilder.h"
18 #include "X86InstrInfo.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86RegisterInfo.h"
21 #include "X86Subtarget.h"
22 #include "X86TargetMachine.h"
24 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/IR/CallingConv.h"
30 #include "llvm/IR/DebugInfo.h"
31 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/GlobalAlias.h"
34 #include "llvm/IR/GlobalVariable.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/IntrinsicsX86.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/MC/MCAsmInfo.h"
40 #include "llvm/MC/MCSymbol.h"
43 using namespace llvm;
44 
45 namespace {
46 
47 class X86FastISel final : public FastISel {
48  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
49  /// make the right decision when generating code for different targets.
50  const X86Subtarget *Subtarget;
51 
52 public:
53  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
54  const TargetLibraryInfo *libInfo)
55  : FastISel(funcInfo, libInfo) {
56  Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
57  }
58 
59  bool fastSelectInstruction(const Instruction *I) override;
60 
61  /// The specified machine instr operand is a vreg, and that
62  /// vreg is being provided by the specified load instruction. If possible,
63  /// try to fold the load as an operand to the instruction, returning true if
64  /// possible.
65  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
66  const LoadInst *LI) override;
67 
68  bool fastLowerArguments() override;
69  bool fastLowerCall(CallLoweringInfo &CLI) override;
70  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
71 
72 #include "X86GenFastISel.inc"
73 
74 private:
75  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
76  const DebugLoc &DL);
77 
78  bool X86FastEmitLoad(MVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
79  unsigned &ResultReg, unsigned Alignment = 1);
80 
81  bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
82  MachineMemOperand *MMO = nullptr, bool Aligned = false);
83  bool X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM,
84  MachineMemOperand *MMO = nullptr, bool Aligned = false);
85 
86  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
87  unsigned &ResultReg);
88 
89  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
90  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
91 
92  bool X86SelectLoad(const Instruction *I);
93 
94  bool X86SelectStore(const Instruction *I);
95 
96  bool X86SelectRet(const Instruction *I);
97 
98  bool X86SelectCmp(const Instruction *I);
99 
100  bool X86SelectZExt(const Instruction *I);
101 
102  bool X86SelectSExt(const Instruction *I);
103 
104  bool X86SelectBranch(const Instruction *I);
105 
106  bool X86SelectShift(const Instruction *I);
107 
108  bool X86SelectDivRem(const Instruction *I);
109 
110  bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
111 
112  bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
113 
114  bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
115 
116  bool X86SelectSelect(const Instruction *I);
117 
118  bool X86SelectTrunc(const Instruction *I);
119 
120  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
121  const TargetRegisterClass *RC);
122 
123  bool X86SelectFPExt(const Instruction *I);
124  bool X86SelectFPTrunc(const Instruction *I);
125  bool X86SelectSIToFP(const Instruction *I);
126  bool X86SelectUIToFP(const Instruction *I);
127  bool X86SelectIntToFP(const Instruction *I, bool IsSigned);
128 
129  const X86InstrInfo *getInstrInfo() const {
130  return Subtarget->getInstrInfo();
131  }
132  const X86TargetMachine *getTargetMachine() const {
133  return static_cast<const X86TargetMachine *>(&TM);
134  }
135 
136  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
137 
138  unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
139  unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
140  unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
141  unsigned fastMaterializeConstant(const Constant *C) override;
142 
143  unsigned fastMaterializeAlloca(const AllocaInst *C) override;
144 
145  unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
146 
147  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
148  /// computed in an SSE register, not on the X87 floating point stack.
149  bool isScalarFPTypeInSSEReg(EVT VT) const {
150  return (VT == MVT::f64 && Subtarget->hasSSE2()) ||
151  (VT == MVT::f32 && Subtarget->hasSSE1()) ||
152  (VT == MVT::f16 && Subtarget->hasFP16());
153  }
154 
155  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
156 
157  bool IsMemcpySmall(uint64_t Len);
158 
159  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
160  X86AddressMode SrcAM, uint64_t Len);
161 
162  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
163  const Value *Cond);
164 
166  X86AddressMode &AM);
167 
168  unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
169  const TargetRegisterClass *RC, unsigned Op0,
170  unsigned Op1, unsigned Op2, unsigned Op3);
171 };
172 
173 } // end anonymous namespace.
174 
175 static std::pair<unsigned, bool>
177  unsigned CC;
178  bool NeedSwap = false;
179 
180  // SSE Condition code mapping:
181  // 0 - EQ
182  // 1 - LT
183  // 2 - LE
184  // 3 - UNORD
185  // 4 - NEQ
186  // 5 - NLT
187  // 6 - NLE
188  // 7 - ORD
189  switch (Predicate) {
190  default: llvm_unreachable("Unexpected predicate");
191  case CmpInst::FCMP_OEQ: CC = 0; break;
192  case CmpInst::FCMP_OGT: NeedSwap = true; LLVM_FALLTHROUGH;
193  case CmpInst::FCMP_OLT: CC = 1; break;
194  case CmpInst::FCMP_OGE: NeedSwap = true; LLVM_FALLTHROUGH;
195  case CmpInst::FCMP_OLE: CC = 2; break;
196  case CmpInst::FCMP_UNO: CC = 3; break;
197  case CmpInst::FCMP_UNE: CC = 4; break;
198  case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH;
199  case CmpInst::FCMP_UGE: CC = 5; break;
200  case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
201  case CmpInst::FCMP_UGT: CC = 6; break;
202  case CmpInst::FCMP_ORD: CC = 7; break;
203  case CmpInst::FCMP_UEQ: CC = 8; break;
204  case CmpInst::FCMP_ONE: CC = 12; break;
205  }
206 
207  return std::make_pair(CC, NeedSwap);
208 }
209 
210 /// Adds a complex addressing mode to the given machine instr builder.
211 /// Note, this will constrain the index register. If its not possible to
212 /// constrain the given index register, then a new one will be created. The
213 /// IndexReg field of the addressing mode will be updated to match in this case.
214 const MachineInstrBuilder &
216  X86AddressMode &AM) {
217  // First constrain the index register. It needs to be a GR64_NOSP.
219  MIB->getNumOperands() +
221  return ::addFullAddress(MIB, AM);
222 }
223 
224 /// Check if it is possible to fold the condition from the XALU intrinsic
225 /// into the user. The condition code will only be updated on success.
226 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
227  const Value *Cond) {
228  if (!isa<ExtractValueInst>(Cond))
229  return false;
230 
231  const auto *EV = cast<ExtractValueInst>(Cond);
232  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
233  return false;
234 
235  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
236  MVT RetVT;
237  const Function *Callee = II->getCalledFunction();
238  Type *RetTy =
239  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
240  if (!isTypeLegal(RetTy, RetVT))
241  return false;
242 
243  if (RetVT != MVT::i32 && RetVT != MVT::i64)
244  return false;
245 
246  X86::CondCode TmpCC;
247  switch (II->getIntrinsicID()) {
248  default: return false;
249  case Intrinsic::sadd_with_overflow:
250  case Intrinsic::ssub_with_overflow:
251  case Intrinsic::smul_with_overflow:
252  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
253  case Intrinsic::uadd_with_overflow:
254  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
255  }
256 
257  // Check if both instructions are in the same basic block.
258  if (II->getParent() != I->getParent())
259  return false;
260 
261  // Make sure nothing is in the way
264  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
265  // We only expect extractvalue instructions between the intrinsic and the
266  // instruction to be selected.
267  if (!isa<ExtractValueInst>(Itr))
268  return false;
269 
270  // Check that the extractvalue operand comes from the intrinsic.
271  const auto *EVI = cast<ExtractValueInst>(Itr);
272  if (EVI->getAggregateOperand() != II)
273  return false;
274  }
275 
276  // Make sure no potentially eflags clobbering phi moves can be inserted in
277  // between.
278  auto HasPhis = [](const BasicBlock *Succ) {
279  return !llvm::empty(Succ->phis());
280  };
281  if (I->isTerminator() && llvm::any_of(successors(I), HasPhis))
282  return false;
283 
284  // Make sure there are no potentially eflags clobbering constant
285  // materializations in between.
286  if (llvm::any_of(I->operands(), [](Value *V) { return isa<Constant>(V); }))
287  return false;
288 
289  CC = TmpCC;
290  return true;
291 }
292 
293 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
294  EVT evt = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true);
295  if (evt == MVT::Other || !evt.isSimple())
296  // Unhandled type. Halt "fast" selection and bail.
297  return false;
298 
299  VT = evt.getSimpleVT();
300  // For now, require SSE/SSE2 for performing floating-point operations,
301  // since x87 requires additional work.
302  if (VT == MVT::f64 && !Subtarget->hasSSE2())
303  return false;
304  if (VT == MVT::f32 && !Subtarget->hasSSE1())
305  return false;
306  // Similarly, no f80 support yet.
307  if (VT == MVT::f80)
308  return false;
309  // We only handle legal types. For example, on x86-32 the instruction
310  // selector contains all of the 64-bit instructions from x86-64,
311  // under the assumption that i64 won't be used if the target doesn't
312  // support it.
313  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
314 }
315 
316 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
317 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
318 /// Return true and the result register by reference if it is possible.
319 bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM,
320  MachineMemOperand *MMO, unsigned &ResultReg,
321  unsigned Alignment) {
322  bool HasSSE1 = Subtarget->hasSSE1();
323  bool HasSSE2 = Subtarget->hasSSE2();
324  bool HasSSE41 = Subtarget->hasSSE41();
325  bool HasAVX = Subtarget->hasAVX();
326  bool HasAVX2 = Subtarget->hasAVX2();
327  bool HasAVX512 = Subtarget->hasAVX512();
328  bool HasVLX = Subtarget->hasVLX();
329  bool IsNonTemporal = MMO && MMO->isNonTemporal();
330 
331  // Treat i1 loads the same as i8 loads. Masking will be done when storing.
332  if (VT == MVT::i1)
333  VT = MVT::i8;
334 
335  // Get opcode and regclass of the output for the given load instruction.
336  unsigned Opc = 0;
337  switch (VT.SimpleTy) {
338  default: return false;
339  case MVT::i8:
340  Opc = X86::MOV8rm;
341  break;
342  case MVT::i16:
343  Opc = X86::MOV16rm;
344  break;
345  case MVT::i32:
346  Opc = X86::MOV32rm;
347  break;
348  case MVT::i64:
349  // Must be in x86-64 mode.
350  Opc = X86::MOV64rm;
351  break;
352  case MVT::f32:
353  Opc = HasAVX512 ? X86::VMOVSSZrm_alt
354  : HasAVX ? X86::VMOVSSrm_alt
355  : HasSSE1 ? X86::MOVSSrm_alt
356  : X86::LD_Fp32m;
357  break;
358  case MVT::f64:
359  Opc = HasAVX512 ? X86::VMOVSDZrm_alt
360  : HasAVX ? X86::VMOVSDrm_alt
361  : HasSSE2 ? X86::MOVSDrm_alt
362  : X86::LD_Fp64m;
363  break;
364  case MVT::f80:
365  // No f80 support yet.
366  return false;
367  case MVT::v4f32:
368  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
369  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
370  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
371  else if (Alignment >= 16)
372  Opc = HasVLX ? X86::VMOVAPSZ128rm :
373  HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
374  else
375  Opc = HasVLX ? X86::VMOVUPSZ128rm :
376  HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
377  break;
378  case MVT::v2f64:
379  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
380  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
381  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
382  else if (Alignment >= 16)
383  Opc = HasVLX ? X86::VMOVAPDZ128rm :
384  HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
385  else
386  Opc = HasVLX ? X86::VMOVUPDZ128rm :
387  HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
388  break;
389  case MVT::v4i32:
390  case MVT::v2i64:
391  case MVT::v8i16:
392  case MVT::v16i8:
393  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
394  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
395  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
396  else if (Alignment >= 16)
397  Opc = HasVLX ? X86::VMOVDQA64Z128rm :
398  HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
399  else
400  Opc = HasVLX ? X86::VMOVDQU64Z128rm :
401  HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
402  break;
403  case MVT::v8f32:
404  assert(HasAVX);
405  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
406  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
407  else if (IsNonTemporal && Alignment >= 16)
408  return false; // Force split for X86::VMOVNTDQArm
409  else if (Alignment >= 32)
410  Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
411  else
412  Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
413  break;
414  case MVT::v4f64:
415  assert(HasAVX);
416  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
417  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
418  else if (IsNonTemporal && Alignment >= 16)
419  return false; // Force split for X86::VMOVNTDQArm
420  else if (Alignment >= 32)
421  Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
422  else
423  Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
424  break;
425  case MVT::v8i32:
426  case MVT::v4i64:
427  case MVT::v16i16:
428  case MVT::v32i8:
429  assert(HasAVX);
430  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
431  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
432  else if (IsNonTemporal && Alignment >= 16)
433  return false; // Force split for X86::VMOVNTDQArm
434  else if (Alignment >= 32)
435  Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
436  else
437  Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
438  break;
439  case MVT::v16f32:
440  assert(HasAVX512);
441  if (IsNonTemporal && Alignment >= 64)
442  Opc = X86::VMOVNTDQAZrm;
443  else
444  Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
445  break;
446  case MVT::v8f64:
447  assert(HasAVX512);
448  if (IsNonTemporal && Alignment >= 64)
449  Opc = X86::VMOVNTDQAZrm;
450  else
451  Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
452  break;
453  case MVT::v8i64:
454  case MVT::v16i32:
455  case MVT::v32i16:
456  case MVT::v64i8:
457  assert(HasAVX512);
458  // Note: There are a lot more choices based on type with AVX-512, but
459  // there's really no advantage when the load isn't masked.
460  if (IsNonTemporal && Alignment >= 64)
461  Opc = X86::VMOVNTDQAZrm;
462  else
463  Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
464  break;
465  }
466 
467  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
468 
469  ResultReg = createResultReg(RC);
470  MachineInstrBuilder MIB =
471  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
472  addFullAddress(MIB, AM);
473  if (MMO)
474  MIB->addMemOperand(*FuncInfo.MF, MMO);
475  return true;
476 }
477 
478 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
479 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
480 /// and a displacement offset, or a GlobalAddress,
481 /// i.e. V. Return true if it is possible.
482 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM,
483  MachineMemOperand *MMO, bool Aligned) {
484  bool HasSSE1 = Subtarget->hasSSE1();
485  bool HasSSE2 = Subtarget->hasSSE2();
486  bool HasSSE4A = Subtarget->hasSSE4A();
487  bool HasAVX = Subtarget->hasAVX();
488  bool HasAVX512 = Subtarget->hasAVX512();
489  bool HasVLX = Subtarget->hasVLX();
490  bool IsNonTemporal = MMO && MMO->isNonTemporal();
491 
492  // Get opcode and regclass of the output for the given store instruction.
493  unsigned Opc = 0;
494  switch (VT.getSimpleVT().SimpleTy) {
495  case MVT::f80: // No f80 support yet.
496  default: return false;
497  case MVT::i1: {
498  // Mask out all but lowest bit.
499  Register AndResult = createResultReg(&X86::GR8RegClass);
500  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
501  TII.get(X86::AND8ri), AndResult)
502  .addReg(ValReg).addImm(1);
503  ValReg = AndResult;
504  LLVM_FALLTHROUGH; // handle i1 as i8.
505  }
506  case MVT::i8: Opc = X86::MOV8mr; break;
507  case MVT::i16: Opc = X86::MOV16mr; break;
508  case MVT::i32:
509  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
510  break;
511  case MVT::i64:
512  // Must be in x86-64 mode.
513  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
514  break;
515  case MVT::f32:
516  if (HasSSE1) {
517  if (IsNonTemporal && HasSSE4A)
518  Opc = X86::MOVNTSS;
519  else
520  Opc = HasAVX512 ? X86::VMOVSSZmr :
521  HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
522  } else
523  Opc = X86::ST_Fp32m;
524  break;
525  case MVT::f64:
526  if (HasSSE2) {
527  if (IsNonTemporal && HasSSE4A)
528  Opc = X86::MOVNTSD;
529  else
530  Opc = HasAVX512 ? X86::VMOVSDZmr :
531  HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
532  } else
533  Opc = X86::ST_Fp64m;
534  break;
535  case MVT::x86mmx:
536  Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
537  break;
538  case MVT::v4f32:
539  if (Aligned) {
540  if (IsNonTemporal)
541  Opc = HasVLX ? X86::VMOVNTPSZ128mr :
542  HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
543  else
544  Opc = HasVLX ? X86::VMOVAPSZ128mr :
545  HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
546  } else
547  Opc = HasVLX ? X86::VMOVUPSZ128mr :
548  HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
549  break;
550  case MVT::v2f64:
551  if (Aligned) {
552  if (IsNonTemporal)
553  Opc = HasVLX ? X86::VMOVNTPDZ128mr :
554  HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
555  else
556  Opc = HasVLX ? X86::VMOVAPDZ128mr :
557  HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
558  } else
559  Opc = HasVLX ? X86::VMOVUPDZ128mr :
560  HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
561  break;
562  case MVT::v4i32:
563  case MVT::v2i64:
564  case MVT::v8i16:
565  case MVT::v16i8:
566  if (Aligned) {
567  if (IsNonTemporal)
568  Opc = HasVLX ? X86::VMOVNTDQZ128mr :
569  HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
570  else
571  Opc = HasVLX ? X86::VMOVDQA64Z128mr :
572  HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
573  } else
574  Opc = HasVLX ? X86::VMOVDQU64Z128mr :
575  HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
576  break;
577  case MVT::v8f32:
578  assert(HasAVX);
579  if (Aligned) {
580  if (IsNonTemporal)
581  Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
582  else
583  Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
584  } else
585  Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
586  break;
587  case MVT::v4f64:
588  assert(HasAVX);
589  if (Aligned) {
590  if (IsNonTemporal)
591  Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
592  else
593  Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
594  } else
595  Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
596  break;
597  case MVT::v8i32:
598  case MVT::v4i64:
599  case MVT::v16i16:
600  case MVT::v32i8:
601  assert(HasAVX);
602  if (Aligned) {
603  if (IsNonTemporal)
604  Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
605  else
606  Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
607  } else
608  Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
609  break;
610  case MVT::v16f32:
611  assert(HasAVX512);
612  if (Aligned)
613  Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
614  else
615  Opc = X86::VMOVUPSZmr;
616  break;
617  case MVT::v8f64:
618  assert(HasAVX512);
619  if (Aligned) {
620  Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
621  } else
622  Opc = X86::VMOVUPDZmr;
623  break;
624  case MVT::v8i64:
625  case MVT::v16i32:
626  case MVT::v32i16:
627  case MVT::v64i8:
628  assert(HasAVX512);
629  // Note: There are a lot more choices based on type with AVX-512, but
630  // there's really no advantage when the store isn't masked.
631  if (Aligned)
632  Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
633  else
634  Opc = X86::VMOVDQU64Zmr;
635  break;
636  }
637 
638  const MCInstrDesc &Desc = TII.get(Opc);
639  // Some of the instructions in the previous switch use FR128 instead
640  // of FR32 for ValReg. Make sure the register we feed the instruction
641  // matches its register class constraints.
642  // Note: This is fine to do a copy from FR32 to FR128, this is the
643  // same registers behind the scene and actually why it did not trigger
644  // any bugs before.
645  ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
646  MachineInstrBuilder MIB =
647  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc);
648  addFullAddress(MIB, AM).addReg(ValReg);
649  if (MMO)
650  MIB->addMemOperand(*FuncInfo.MF, MMO);
651 
652  return true;
653 }
654 
655 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
656  X86AddressMode &AM,
657  MachineMemOperand *MMO, bool Aligned) {
658  // Handle 'null' like i32/i64 0.
659  if (isa<ConstantPointerNull>(Val))
660  Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
661 
662  // If this is a store of a simple constant, fold the constant into the store.
663  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
664  unsigned Opc = 0;
665  bool Signed = true;
666  switch (VT.getSimpleVT().SimpleTy) {
667  default: break;
668  case MVT::i1:
669  Signed = false;
670  LLVM_FALLTHROUGH; // Handle as i8.
671  case MVT::i8: Opc = X86::MOV8mi; break;
672  case MVT::i16: Opc = X86::MOV16mi; break;
673  case MVT::i32: Opc = X86::MOV32mi; break;
674  case MVT::i64:
675  // Must be a 32-bit sign extended value.
676  if (isInt<32>(CI->getSExtValue()))
677  Opc = X86::MOV64mi32;
678  break;
679  }
680 
681  if (Opc) {
682  MachineInstrBuilder MIB =
683  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
684  addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
685  : CI->getZExtValue());
686  if (MMO)
687  MIB->addMemOperand(*FuncInfo.MF, MMO);
688  return true;
689  }
690  }
691 
692  Register ValReg = getRegForValue(Val);
693  if (ValReg == 0)
694  return false;
695 
696  return X86FastEmitStore(VT, ValReg, AM, MMO, Aligned);
697 }
698 
699 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
700 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
701 /// ISD::SIGN_EXTEND).
702 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
703  unsigned Src, EVT SrcVT,
704  unsigned &ResultReg) {
705  unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
706  if (RR == 0)
707  return false;
708 
709  ResultReg = RR;
710  return true;
711 }
712 
713 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
714  // Handle constant address.
715  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
716  // Can't handle alternate code models yet.
717  if (TM.getCodeModel() != CodeModel::Small)
718  return false;
719 
720  // Can't handle TLS yet.
721  if (GV->isThreadLocal())
722  return false;
723 
724  // Can't handle !absolute_symbol references yet.
725  if (GV->isAbsoluteSymbolRef())
726  return false;
727 
728  // RIP-relative addresses can't have additional register operands, so if
729  // we've already folded stuff into the addressing mode, just force the
730  // global value into its own register, which we can use as the basereg.
731  if (!Subtarget->isPICStyleRIPRel() ||
732  (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
733  // Okay, we've committed to selecting this global. Set up the address.
734  AM.GV = GV;
735 
736  // Allow the subtarget to classify the global.
737  unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
738 
739  // If this reference is relative to the pic base, set it now.
740  if (isGlobalRelativeToPICBase(GVFlags)) {
741  // FIXME: How do we know Base.Reg is free??
742  AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
743  }
744 
745  // Unless the ABI requires an extra load, return a direct reference to
746  // the global.
747  if (!isGlobalStubReference(GVFlags)) {
748  if (Subtarget->isPICStyleRIPRel()) {
749  // Use rip-relative addressing if we can. Above we verified that the
750  // base and index registers are unused.
751  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
752  AM.Base.Reg = X86::RIP;
753  }
754  AM.GVOpFlags = GVFlags;
755  return true;
756  }
757 
758  // Ok, we need to do a load from a stub. If we've already loaded from
759  // this stub, reuse the loaded pointer, otherwise emit the load now.
760  DenseMap<const Value *, Register>::iterator I = LocalValueMap.find(V);
761  Register LoadReg;
762  if (I != LocalValueMap.end() && I->second) {
763  LoadReg = I->second;
764  } else {
765  // Issue load from stub.
766  unsigned Opc = 0;
767  const TargetRegisterClass *RC = nullptr;
768  X86AddressMode StubAM;
769  StubAM.Base.Reg = AM.Base.Reg;
770  StubAM.GV = GV;
771  StubAM.GVOpFlags = GVFlags;
772 
773  // Prepare for inserting code in the local-value area.
774  SavePoint SaveInsertPt = enterLocalValueArea();
775 
776  if (TLI.getPointerTy(DL) == MVT::i64) {
777  Opc = X86::MOV64rm;
778  RC = &X86::GR64RegClass;
779  } else {
780  Opc = X86::MOV32rm;
781  RC = &X86::GR32RegClass;
782  }
783 
784  if (Subtarget->isPICStyleRIPRel() || GVFlags == X86II::MO_GOTPCREL ||
785  GVFlags == X86II::MO_GOTPCREL_NORELAX)
786  StubAM.Base.Reg = X86::RIP;
787 
788  LoadReg = createResultReg(RC);
789  MachineInstrBuilder LoadMI =
790  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
791  addFullAddress(LoadMI, StubAM);
792 
793  // Ok, back to normal mode.
794  leaveLocalValueArea(SaveInsertPt);
795 
796  // Prevent loading GV stub multiple times in same MBB.
797  LocalValueMap[V] = LoadReg;
798  }
799 
800  // Now construct the final address. Note that the Disp, Scale,
801  // and Index values may already be set here.
802  AM.Base.Reg = LoadReg;
803  AM.GV = nullptr;
804  return true;
805  }
806  }
807 
808  // If all else fails, try to materialize the value in a register.
809  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
810  if (AM.Base.Reg == 0) {
811  AM.Base.Reg = getRegForValue(V);
812  return AM.Base.Reg != 0;
813  }
814  if (AM.IndexReg == 0) {
815  assert(AM.Scale == 1 && "Scale with no index!");
816  AM.IndexReg = getRegForValue(V);
817  return AM.IndexReg != 0;
818  }
819  }
820 
821  return false;
822 }
823 
824 /// X86SelectAddress - Attempt to fill in an address from the given value.
825 ///
828 redo_gep:
829  const User *U = nullptr;
830  unsigned Opcode = Instruction::UserOp1;
831  if (const Instruction *I = dyn_cast<Instruction>(V)) {
832  // Don't walk into other basic blocks; it's possible we haven't
833  // visited them yet, so the instructions may not yet be assigned
834  // virtual registers.
835  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
836  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
837  Opcode = I->getOpcode();
838  U = I;
839  }
840  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
841  Opcode = C->getOpcode();
842  U = C;
843  }
844 
845  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
846  if (Ty->getAddressSpace() > 255)
847  // Fast instruction selection doesn't support the special
848  // address spaces.
849  return false;
850 
851  switch (Opcode) {
852  default: break;
853  case Instruction::BitCast:
854  // Look past bitcasts.
855  return X86SelectAddress(U->getOperand(0), AM);
856 
857  case Instruction::IntToPtr:
858  // Look past no-op inttoptrs.
859  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
860  TLI.getPointerTy(DL))
861  return X86SelectAddress(U->getOperand(0), AM);
862  break;
863 
864  case Instruction::PtrToInt:
865  // Look past no-op ptrtoints.
866  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
867  return X86SelectAddress(U->getOperand(0), AM);
868  break;
869 
870  case Instruction::Alloca: {
871  // Do static allocas.
872  const AllocaInst *A = cast<AllocaInst>(V);
874  FuncInfo.StaticAllocaMap.find(A);
875  if (SI != FuncInfo.StaticAllocaMap.end()) {
877  AM.Base.FrameIndex = SI->second;
878  return true;
879  }
880  break;
881  }
882 
883  case Instruction::Add: {
884  // Adds of constants are common and easy enough.
885  if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
886  uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
887  // They have to fit in the 32-bit signed displacement field though.
888  if (isInt<32>(Disp)) {
889  AM.Disp = (uint32_t)Disp;
890  return X86SelectAddress(U->getOperand(0), AM);
891  }
892  }
893  break;
894  }
895 
896  case Instruction::GetElementPtr: {
897  X86AddressMode SavedAM = AM;
898 
899  // Pattern-match simple GEPs.
900  uint64_t Disp = (int32_t)AM.Disp;
901  unsigned IndexReg = AM.IndexReg;
902  unsigned Scale = AM.Scale;
904  // Iterate through the indices, folding what we can. Constants can be
905  // folded, and one dynamic index can be handled, if the scale is supported.
906  for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
907  i != e; ++i, ++GTI) {
908  const Value *Op = *i;
909  if (StructType *STy = GTI.getStructTypeOrNull()) {
910  const StructLayout *SL = DL.getStructLayout(STy);
911  Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
912  continue;
913  }
914 
915  // A array/variable index is always of the form i*S where S is the
916  // constant scale size. See if we can push the scale into immediates.
917  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
918  for (;;) {
919  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
920  // Constant-offset addressing.
921  Disp += CI->getSExtValue() * S;
922  break;
923  }
924  if (canFoldAddIntoGEP(U, Op)) {
925  // A compatible add with a constant operand. Fold the constant.
926  ConstantInt *CI =
927  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
928  Disp += CI->getSExtValue() * S;
929  // Iterate on the other operand.
930  Op = cast<AddOperator>(Op)->getOperand(0);
931  continue;
932  }
933  if (IndexReg == 0 &&
934  (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
935  (S == 1 || S == 2 || S == 4 || S == 8)) {
936  // Scaled-index addressing.
937  Scale = S;
938  IndexReg = getRegForGEPIndex(Op);
939  if (IndexReg == 0)
940  return false;
941  break;
942  }
943  // Unsupported.
944  goto unsupported_gep;
945  }
946  }
947 
948  // Check for displacement overflow.
949  if (!isInt<32>(Disp))
950  break;
951 
952  AM.IndexReg = IndexReg;
953  AM.Scale = Scale;
954  AM.Disp = (uint32_t)Disp;
955  GEPs.push_back(V);
956 
957  if (const GetElementPtrInst *GEP =
958  dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
959  // Ok, the GEP indices were covered by constant-offset and scaled-index
960  // addressing. Update the address state and move on to examining the base.
961  V = GEP;
962  goto redo_gep;
963  } else if (X86SelectAddress(U->getOperand(0), AM)) {
964  return true;
965  }
966 
967  // If we couldn't merge the gep value into this addr mode, revert back to
968  // our address and just match the value instead of completely failing.
969  AM = SavedAM;
970 
971  for (const Value *I : reverse(GEPs))
972  if (handleConstantAddresses(I, AM))
973  return true;
974 
975  return false;
976  unsupported_gep:
977  // Ok, the GEP indices weren't all covered.
978  break;
979  }
980  }
981 
982  return handleConstantAddresses(V, AM);
983 }
984 
985 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
986 ///
987 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
988  const User *U = nullptr;
989  unsigned Opcode = Instruction::UserOp1;
990  const Instruction *I = dyn_cast<Instruction>(V);
991  // Record if the value is defined in the same basic block.
992  //
993  // This information is crucial to know whether or not folding an
994  // operand is valid.
995  // Indeed, FastISel generates or reuses a virtual register for all
996  // operands of all instructions it selects. Obviously, the definition and
997  // its uses must use the same virtual register otherwise the produced
998  // code is incorrect.
999  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
1000  // registers for values that are alive across basic blocks. This ensures
1001  // that the values are consistently set between across basic block, even
1002  // if different instruction selection mechanisms are used (e.g., a mix of
1003  // SDISel and FastISel).
1004  // For values local to a basic block, the instruction selection process
1005  // generates these virtual registers with whatever method is appropriate
1006  // for its needs. In particular, FastISel and SDISel do not share the way
1007  // local virtual registers are set.
1008  // Therefore, this is impossible (or at least unsafe) to share values
1009  // between basic blocks unless they use the same instruction selection
1010  // method, which is not guarantee for X86.
1011  // Moreover, things like hasOneUse could not be used accurately, if we
1012  // allow to reference values across basic blocks whereas they are not
1013  // alive across basic blocks initially.
1014  bool InMBB = true;
1015  if (I) {
1016  Opcode = I->getOpcode();
1017  U = I;
1018  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
1019  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
1020  Opcode = C->getOpcode();
1021  U = C;
1022  }
1023 
1024  switch (Opcode) {
1025  default: break;
1026  case Instruction::BitCast:
1027  // Look past bitcasts if its operand is in the same BB.
1028  if (InMBB)
1029  return X86SelectCallAddress(U->getOperand(0), AM);
1030  break;
1031 
1032  case Instruction::IntToPtr:
1033  // Look past no-op inttoptrs if its operand is in the same BB.
1034  if (InMBB &&
1035  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
1036  TLI.getPointerTy(DL))
1037  return X86SelectCallAddress(U->getOperand(0), AM);
1038  break;
1039 
1040  case Instruction::PtrToInt:
1041  // Look past no-op ptrtoints if its operand is in the same BB.
1042  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
1043  return X86SelectCallAddress(U->getOperand(0), AM);
1044  break;
1045  }
1046 
1047  // Handle constant address.
1048  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1049  // Can't handle alternate code models yet.
1050  if (TM.getCodeModel() != CodeModel::Small)
1051  return false;
1052 
1053  // RIP-relative addresses can't have additional register operands.
1054  if (Subtarget->isPICStyleRIPRel() &&
1055  (AM.Base.Reg != 0 || AM.IndexReg != 0))
1056  return false;
1057 
1058  // Can't handle TLS.
1059  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
1060  if (GVar->isThreadLocal())
1061  return false;
1062 
1063  // Okay, we've committed to selecting this global. Set up the basic address.
1064  AM.GV = GV;
1065 
1066  // Return a direct reference to the global. Fastisel can handle calls to
1067  // functions that require loads, such as dllimport and nonlazybind
1068  // functions.
1069  if (Subtarget->isPICStyleRIPRel()) {
1070  // Use rip-relative addressing if we can. Above we verified that the
1071  // base and index registers are unused.
1072  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
1073  AM.Base.Reg = X86::RIP;
1074  } else {
1075  AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
1076  }
1077 
1078  return true;
1079  }
1080 
1081  // If all else fails, try to materialize the value in a register.
1082  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
1083  auto GetCallRegForValue = [this](const Value *V) {
1084  Register Reg = getRegForValue(V);
1085 
1086  // In 64-bit mode, we need a 64-bit register even if pointers are 32 bits.
1087  if (Reg && Subtarget->isTarget64BitILP32()) {
1088  Register CopyReg = createResultReg(&X86::GR32RegClass);
1089  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32rr),
1090  CopyReg)
1091  .addReg(Reg);
1092 
1093  Register ExtReg = createResultReg(&X86::GR64RegClass);
1094  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1095  TII.get(TargetOpcode::SUBREG_TO_REG), ExtReg)
1096  .addImm(0)
1097  .addReg(CopyReg)
1098  .addImm(X86::sub_32bit);
1099  Reg = ExtReg;
1100  }
1101 
1102  return Reg;
1103  };
1104 
1105  if (AM.Base.Reg == 0) {
1106  AM.Base.Reg = GetCallRegForValue(V);
1107  return AM.Base.Reg != 0;
1108  }
1109  if (AM.IndexReg == 0) {
1110  assert(AM.Scale == 1 && "Scale with no index!");
1111  AM.IndexReg = GetCallRegForValue(V);
1112  return AM.IndexReg != 0;
1113  }
1114  }
1115 
1116  return false;
1117 }
1118 
1119 
1120 /// X86SelectStore - Select and emit code to implement store instructions.
1121 bool X86FastISel::X86SelectStore(const Instruction *I) {
1122  // Atomic stores need special handling.
1123  const StoreInst *S = cast<StoreInst>(I);
1124 
1125  if (S->isAtomic())
1126  return false;
1127 
1128  const Value *PtrV = I->getOperand(1);
1129  if (TLI.supportSwiftError()) {
1130  // Swifterror values can come from either a function parameter with
1131  // swifterror attribute or an alloca with swifterror attribute.
1132  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1133  if (Arg->hasSwiftErrorAttr())
1134  return false;
1135  }
1136 
1137  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1138  if (Alloca->isSwiftError())
1139  return false;
1140  }
1141  }
1142 
1143  const Value *Val = S->getValueOperand();
1144  const Value *Ptr = S->getPointerOperand();
1145 
1146  MVT VT;
1147  if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
1148  return false;
1149 
1150  Align Alignment = S->getAlign();
1151  Align ABIAlignment = DL.getABITypeAlign(Val->getType());
1152  bool Aligned = Alignment >= ABIAlignment;
1153 
1154  X86AddressMode AM;
1155  if (!X86SelectAddress(Ptr, AM))
1156  return false;
1157 
1158  return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
1159 }
1160 
1161 /// X86SelectRet - Select and emit code to implement ret instructions.
1162 bool X86FastISel::X86SelectRet(const Instruction *I) {
1163  const ReturnInst *Ret = cast<ReturnInst>(I);
1164  const Function &F = *I->getParent()->getParent();
1165  const X86MachineFunctionInfo *X86MFInfo =
1166  FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
1167 
1168  if (!FuncInfo.CanLowerReturn)
1169  return false;
1170 
1171  if (TLI.supportSwiftError() &&
1172  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1173  return false;
1174 
1175  if (TLI.supportSplitCSR(FuncInfo.MF))
1176  return false;
1177 
1178  CallingConv::ID CC = F.getCallingConv();
1179  if (CC != CallingConv::C &&
1180  CC != CallingConv::Fast &&
1181  CC != CallingConv::Tail &&
1182  CC != CallingConv::SwiftTail &&
1183  CC != CallingConv::X86_FastCall &&
1184  CC != CallingConv::X86_StdCall &&
1185  CC != CallingConv::X86_ThisCall &&
1186  CC != CallingConv::X86_64_SysV &&
1187  CC != CallingConv::Win64)
1188  return false;
1189 
1190  // Don't handle popping bytes if they don't fit the ret's immediate.
1191  if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
1192  return false;
1193 
1194  // fastcc with -tailcallopt is intended to provide a guaranteed
1195  // tail call optimization. Fastisel doesn't know how to do that.
1196  if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
1198  return false;
1199 
1200  // Let SDISel handle vararg functions.
1201  if (F.isVarArg())
1202  return false;
1203 
1204  // Build a list of return value registers.
1205  SmallVector<unsigned, 4> RetRegs;
1206 
1207  if (Ret->getNumOperands() > 0) {
1209  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1210 
1211  // Analyze operands of the call, assigning locations to each operand.
1213  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
1214  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1215 
1216  const Value *RV = Ret->getOperand(0);
1217  Register Reg = getRegForValue(RV);
1218  if (Reg == 0)
1219  return false;
1220 
1221  // Only handle a single return value for now.
1222  if (ValLocs.size() != 1)
1223  return false;
1224 
1225  CCValAssign &VA = ValLocs[0];
1226 
1227  // Don't bother handling odd stuff for now.
1228  if (VA.getLocInfo() != CCValAssign::Full)
1229  return false;
1230  // Only handle register returns for now.
1231  if (!VA.isRegLoc())
1232  return false;
1233 
1234  // The calling-convention tables for x87 returns don't tell
1235  // the whole story.
1236  if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
1237  return false;
1238 
1239  unsigned SrcReg = Reg + VA.getValNo();
1240  EVT SrcVT = TLI.getValueType(DL, RV->getType());
1241  EVT DstVT = VA.getValVT();
1242  // Special handling for extended integers.
1243  if (SrcVT != DstVT) {
1244  if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1245  return false;
1246 
1247  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1248  return false;
1249 
1250  assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1251 
1252  if (SrcVT == MVT::i1) {
1253  if (Outs[0].Flags.isSExt())
1254  return false;
1255  // TODO
1256  SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg);
1257  SrcVT = MVT::i8;
1258  }
1259  unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1261  // TODO
1262  SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op, SrcReg);
1263  }
1264 
1265  // Make the copy.
1266  Register DstReg = VA.getLocReg();
1267  const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
1268  // Avoid a cross-class copy. This is very unlikely.
1269  if (!SrcRC->contains(DstReg))
1270  return false;
1271  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1272  TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
1273 
1274  // Add register to return instruction.
1275  RetRegs.push_back(VA.getLocReg());
1276  }
1277 
1278  // Swift calling convention does not require we copy the sret argument
1279  // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1280 
1281  // All x86 ABIs require that for returning structs by value we copy
1282  // the sret argument into %rax/%eax (depending on ABI) for the return.
1283  // We saved the argument into a virtual register in the entry block,
1284  // so now we copy the value out and into %rax/%eax.
1285  if (F.hasStructRetAttr() && CC != CallingConv::Swift &&
1286  CC != CallingConv::SwiftTail) {
1287  Register Reg = X86MFInfo->getSRetReturnReg();
1288  assert(Reg &&
1289  "SRetReturnReg should have been set in LowerFormalArguments()!");
1290  unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
1291  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1292  TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
1293  RetRegs.push_back(RetReg);
1294  }
1295 
1296  // Now emit the RET.
1297  MachineInstrBuilder MIB;
1298  if (X86MFInfo->getBytesToPopOnReturn()) {
1299  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1300  TII.get(Subtarget->is64Bit() ? X86::RETI64 : X86::RETI32))
1301  .addImm(X86MFInfo->getBytesToPopOnReturn());
1302  } else {
1303  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1304  TII.get(Subtarget->is64Bit() ? X86::RET64 : X86::RET32));
1305  }
1306  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1307  MIB.addReg(RetRegs[i], RegState::Implicit);
1308  return true;
1309 }
1310 
1311 /// X86SelectLoad - Select and emit code to implement load instructions.
1312 ///
1313 bool X86FastISel::X86SelectLoad(const Instruction *I) {
1314  const LoadInst *LI = cast<LoadInst>(I);
1315 
1316  // Atomic loads need special handling.
1317  if (LI->isAtomic())
1318  return false;
1319 
1320  const Value *SV = I->getOperand(0);
1321  if (TLI.supportSwiftError()) {
1322  // Swifterror values can come from either a function parameter with
1323  // swifterror attribute or an alloca with swifterror attribute.
1324  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1325  if (Arg->hasSwiftErrorAttr())
1326  return false;
1327  }
1328 
1329  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1330  if (Alloca->isSwiftError())
1331  return false;
1332  }
1333  }
1334 
1335  MVT VT;
1336  if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1337  return false;
1338 
1339  const Value *Ptr = LI->getPointerOperand();
1340 
1341  X86AddressMode AM;
1342  if (!X86SelectAddress(Ptr, AM))
1343  return false;
1344 
1345  unsigned ResultReg = 0;
1346  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1347  LI->getAlign().value()))
1348  return false;
1349 
1350  updateValueMap(I, ResultReg);
1351  return true;
1352 }
1353 
1354 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1355  bool HasAVX512 = Subtarget->hasAVX512();
1356  bool HasAVX = Subtarget->hasAVX();
1357  bool HasSSE1 = Subtarget->hasSSE1();
1358  bool HasSSE2 = Subtarget->hasSSE2();
1359 
1360  switch (VT.getSimpleVT().SimpleTy) {
1361  default: return 0;
1362  case MVT::i8: return X86::CMP8rr;
1363  case MVT::i16: return X86::CMP16rr;
1364  case MVT::i32: return X86::CMP32rr;
1365  case MVT::i64: return X86::CMP64rr;
1366  case MVT::f32:
1367  return HasAVX512 ? X86::VUCOMISSZrr
1368  : HasAVX ? X86::VUCOMISSrr
1369  : HasSSE1 ? X86::UCOMISSrr
1370  : 0;
1371  case MVT::f64:
1372  return HasAVX512 ? X86::VUCOMISDZrr
1373  : HasAVX ? X86::VUCOMISDrr
1374  : HasSSE2 ? X86::UCOMISDrr
1375  : 0;
1376  }
1377 }
1378 
1379 /// If we have a comparison with RHS as the RHS of the comparison, return an
1380 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1381 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1382  int64_t Val = RHSC->getSExtValue();
1383  switch (VT.getSimpleVT().SimpleTy) {
1384  // Otherwise, we can't fold the immediate into this comparison.
1385  default:
1386  return 0;
1387  case MVT::i8:
1388  return X86::CMP8ri;
1389  case MVT::i16:
1390  if (isInt<8>(Val))
1391  return X86::CMP16ri8;
1392  return X86::CMP16ri;
1393  case MVT::i32:
1394  if (isInt<8>(Val))
1395  return X86::CMP32ri8;
1396  return X86::CMP32ri;
1397  case MVT::i64:
1398  if (isInt<8>(Val))
1399  return X86::CMP64ri8;
1400  // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1401  // field.
1402  if (isInt<32>(Val))
1403  return X86::CMP64ri32;
1404  return 0;
1405  }
1406 }
1407 
1408 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
1409  const DebugLoc &CurDbgLoc) {
1410  Register Op0Reg = getRegForValue(Op0);
1411  if (Op0Reg == 0) return false;
1412 
1413  // Handle 'null' like i32/i64 0.
1414  if (isa<ConstantPointerNull>(Op1))
1415  Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1416 
1417  // We have two options: compare with register or immediate. If the RHS of
1418  // the compare is an immediate that we can fold into this compare, use
1419  // CMPri, otherwise use CMPrr.
1420  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1421  if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1422  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
1423  .addReg(Op0Reg)
1424  .addImm(Op1C->getSExtValue());
1425  return true;
1426  }
1427  }
1428 
1429  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1430  if (CompareOpc == 0) return false;
1431 
1432  Register Op1Reg = getRegForValue(Op1);
1433  if (Op1Reg == 0) return false;
1434  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
1435  .addReg(Op0Reg)
1436  .addReg(Op1Reg);
1437 
1438  return true;
1439 }
1440 
1441 bool X86FastISel::X86SelectCmp(const Instruction *I) {
1442  const CmpInst *CI = cast<CmpInst>(I);
1443 
1444  MVT VT;
1445  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1446  return false;
1447 
1448  // Below code only works for scalars.
1449  if (VT.isVector())
1450  return false;
1451 
1452  // Try to optimize or fold the cmp.
1453  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1454  unsigned ResultReg = 0;
1455  switch (Predicate) {
1456  default: break;
1457  case CmpInst::FCMP_FALSE: {
1458  ResultReg = createResultReg(&X86::GR32RegClass);
1459  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
1460  ResultReg);
1461  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, X86::sub_8bit);
1462  if (!ResultReg)
1463  return false;
1464  break;
1465  }
1466  case CmpInst::FCMP_TRUE: {
1467  ResultReg = createResultReg(&X86::GR8RegClass);
1468  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
1469  ResultReg).addImm(1);
1470  break;
1471  }
1472  }
1473 
1474  if (ResultReg) {
1475  updateValueMap(I, ResultReg);
1476  return true;
1477  }
1478 
1479  const Value *LHS = CI->getOperand(0);
1480  const Value *RHS = CI->getOperand(1);
1481 
1482  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1483  // We don't have to materialize a zero constant for this case and can just use
1484  // %x again on the RHS.
1486  const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1487  if (RHSC && RHSC->isNullValue())
1488  RHS = LHS;
1489  }
1490 
1491  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1492  static const uint16_t SETFOpcTable[2][3] = {
1493  { X86::COND_E, X86::COND_NP, X86::AND8rr },
1494  { X86::COND_NE, X86::COND_P, X86::OR8rr }
1495  };
1496  const uint16_t *SETFOpc = nullptr;
1497  switch (Predicate) {
1498  default: break;
1499  case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1500  case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1501  }
1502 
1503  ResultReg = createResultReg(&X86::GR8RegClass);
1504  if (SETFOpc) {
1505  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1506  return false;
1507 
1508  Register FlagReg1 = createResultReg(&X86::GR8RegClass);
1509  Register FlagReg2 = createResultReg(&X86::GR8RegClass);
1510  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1511  FlagReg1).addImm(SETFOpc[0]);
1512  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1513  FlagReg2).addImm(SETFOpc[1]);
1514  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
1515  ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1516  updateValueMap(I, ResultReg);
1517  return true;
1518  }
1519 
1520  X86::CondCode CC;
1521  bool SwapArgs;
1522  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1523  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1524 
1525  if (SwapArgs)
1526  std::swap(LHS, RHS);
1527 
1528  // Emit a compare of LHS/RHS.
1529  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1530  return false;
1531 
1532  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1533  ResultReg).addImm(CC);
1534  updateValueMap(I, ResultReg);
1535  return true;
1536 }
1537 
1538 bool X86FastISel::X86SelectZExt(const Instruction *I) {
1539  EVT DstVT = TLI.getValueType(DL, I->getType());
1540  if (!TLI.isTypeLegal(DstVT))
1541  return false;
1542 
1543  Register ResultReg = getRegForValue(I->getOperand(0));
1544  if (ResultReg == 0)
1545  return false;
1546 
1547  // Handle zero-extension from i1 to i8, which is common.
1548  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1549  if (SrcVT == MVT::i1) {
1550  // Set the high bits to zero.
1551  ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg);
1552  SrcVT = MVT::i8;
1553 
1554  if (ResultReg == 0)
1555  return false;
1556  }
1557 
1558  if (DstVT == MVT::i64) {
1559  // Handle extension to 64-bits via sub-register shenanigans.
1560  unsigned MovInst;
1561 
1562  switch (SrcVT.SimpleTy) {
1563  case MVT::i8: MovInst = X86::MOVZX32rr8; break;
1564  case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1565  case MVT::i32: MovInst = X86::MOV32rr; break;
1566  default: llvm_unreachable("Unexpected zext to i64 source type");
1567  }
1568 
1569  Register Result32 = createResultReg(&X86::GR32RegClass);
1570  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
1571  .addReg(ResultReg);
1572 
1573  ResultReg = createResultReg(&X86::GR64RegClass);
1574  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
1575  ResultReg)
1576  .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1577  } else if (DstVT == MVT::i16) {
1578  // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1579  // extend to 32-bits and then extract down to 16-bits.
1580  Register Result32 = createResultReg(&X86::GR32RegClass);
1581  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8),
1582  Result32).addReg(ResultReg);
1583 
1584  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, X86::sub_16bit);
1585  } else if (DstVT != MVT::i8) {
1586  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1587  ResultReg);
1588  if (ResultReg == 0)
1589  return false;
1590  }
1591 
1592  updateValueMap(I, ResultReg);
1593  return true;
1594 }
1595 
1596 bool X86FastISel::X86SelectSExt(const Instruction *I) {
1597  EVT DstVT = TLI.getValueType(DL, I->getType());
1598  if (!TLI.isTypeLegal(DstVT))
1599  return false;
1600 
1601  Register ResultReg = getRegForValue(I->getOperand(0));
1602  if (ResultReg == 0)
1603  return false;
1604 
1605  // Handle sign-extension from i1 to i8.
1606  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1607  if (SrcVT == MVT::i1) {
1608  // Set the high bits to zero.
1609  Register ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg);
1610  if (ZExtReg == 0)
1611  return false;
1612 
1613  // Negate the result to make an 8-bit sign extended value.
1614  ResultReg = createResultReg(&X86::GR8RegClass);
1615  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::NEG8r),
1616  ResultReg).addReg(ZExtReg);
1617 
1618  SrcVT = MVT::i8;
1619  }
1620 
1621  if (DstVT == MVT::i16) {
1622  // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1623  // extend to 32-bits and then extract down to 16-bits.
1624  Register Result32 = createResultReg(&X86::GR32RegClass);
1625  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8),
1626  Result32).addReg(ResultReg);
1627 
1628  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, X86::sub_16bit);
1629  } else if (DstVT != MVT::i8) {
1630  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
1631  ResultReg);
1632  if (ResultReg == 0)
1633  return false;
1634  }
1635 
1636  updateValueMap(I, ResultReg);
1637  return true;
1638 }
1639 
1640 bool X86FastISel::X86SelectBranch(const Instruction *I) {
1641  // Unconditional branches are selected by tablegen-generated code.
1642  // Handle a conditional branch.
1643  const BranchInst *BI = cast<BranchInst>(I);
1644  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1645  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1646 
1647  // Fold the common case of a conditional branch with a comparison
1648  // in the same block (values defined on other blocks may not have
1649  // initialized registers).
1650  X86::CondCode CC;
1651  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1652  if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1653  EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1654 
1655  // Try to optimize or fold the cmp.
1656  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1657  switch (Predicate) {
1658  default: break;
1659  case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
1660  case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
1661  }
1662 
1663  const Value *CmpLHS = CI->getOperand(0);
1664  const Value *CmpRHS = CI->getOperand(1);
1665 
1666  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1667  // 0.0.
1668  // We don't have to materialize a zero constant for this case and can just
1669  // use %x again on the RHS.
1671  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1672  if (CmpRHSC && CmpRHSC->isNullValue())
1673  CmpRHS = CmpLHS;
1674  }
1675 
1676  // Try to take advantage of fallthrough opportunities.
1677  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1678  std::swap(TrueMBB, FalseMBB);
1680  }
1681 
1682  // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1683  // code check. Instead two branch instructions are required to check all
1684  // the flags. First we change the predicate to a supported condition code,
1685  // which will be the first branch. Later one we will emit the second
1686  // branch.
1687  bool NeedExtraBranch = false;
1688  switch (Predicate) {
1689  default: break;
1690  case CmpInst::FCMP_OEQ:
1691  std::swap(TrueMBB, FalseMBB);
1693  case CmpInst::FCMP_UNE:
1694  NeedExtraBranch = true;
1696  break;
1697  }
1698 
1699  bool SwapArgs;
1700  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1701  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1702 
1703  if (SwapArgs)
1704  std::swap(CmpLHS, CmpRHS);
1705 
1706  // Emit a compare of the LHS and RHS, setting the flags.
1707  if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
1708  return false;
1709 
1710  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1711  .addMBB(TrueMBB).addImm(CC);
1712 
1713  // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1714  // to UNE above).
1715  if (NeedExtraBranch) {
1716  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1717  .addMBB(TrueMBB).addImm(X86::COND_P);
1718  }
1719 
1720  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1721  return true;
1722  }
1723  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1724  // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1725  // typically happen for _Bool and C++ bools.
1726  MVT SourceVT;
1727  if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1728  isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1729  unsigned TestOpc = 0;
1730  switch (SourceVT.SimpleTy) {
1731  default: break;
1732  case MVT::i8: TestOpc = X86::TEST8ri; break;
1733  case MVT::i16: TestOpc = X86::TEST16ri; break;
1734  case MVT::i32: TestOpc = X86::TEST32ri; break;
1735  case MVT::i64: TestOpc = X86::TEST64ri32; break;
1736  }
1737  if (TestOpc) {
1738  Register OpReg = getRegForValue(TI->getOperand(0));
1739  if (OpReg == 0) return false;
1740 
1741  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
1742  .addReg(OpReg).addImm(1);
1743 
1744  unsigned JmpCond = X86::COND_NE;
1745  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1746  std::swap(TrueMBB, FalseMBB);
1747  JmpCond = X86::COND_E;
1748  }
1749 
1750  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1751  .addMBB(TrueMBB).addImm(JmpCond);
1752 
1753  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1754  return true;
1755  }
1756  }
1757  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1758  // Fake request the condition, otherwise the intrinsic might be completely
1759  // optimized away.
1760  Register TmpReg = getRegForValue(BI->getCondition());
1761  if (TmpReg == 0)
1762  return false;
1763 
1764  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1765  .addMBB(TrueMBB).addImm(CC);
1766  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1767  return true;
1768  }
1769 
1770  // Otherwise do a clumsy setcc and re-test it.
1771  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1772  // in an explicit cast, so make sure to handle that correctly.
1773  Register OpReg = getRegForValue(BI->getCondition());
1774  if (OpReg == 0) return false;
1775 
1776  // In case OpReg is a K register, COPY to a GPR
1777  if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1778  unsigned KOpReg = OpReg;
1779  OpReg = createResultReg(&X86::GR32RegClass);
1780  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1781  TII.get(TargetOpcode::COPY), OpReg)
1782  .addReg(KOpReg);
1783  OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, X86::sub_8bit);
1784  }
1785  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1786  .addReg(OpReg)
1787  .addImm(1);
1788  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1789  .addMBB(TrueMBB).addImm(X86::COND_NE);
1790  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1791  return true;
1792 }
1793 
1794 bool X86FastISel::X86SelectShift(const Instruction *I) {
1795  unsigned CReg = 0, OpReg = 0;
1796  const TargetRegisterClass *RC = nullptr;
1797  if (I->getType()->isIntegerTy(8)) {
1798  CReg = X86::CL;
1799  RC = &X86::GR8RegClass;
1800  switch (I->getOpcode()) {
1801  case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1802  case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1803  case Instruction::Shl: OpReg = X86::SHL8rCL; break;
1804  default: return false;
1805  }
1806  } else if (I->getType()->isIntegerTy(16)) {
1807  CReg = X86::CX;
1808  RC = &X86::GR16RegClass;
1809  switch (I->getOpcode()) {
1810  default: llvm_unreachable("Unexpected shift opcode");
1811  case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1812  case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1813  case Instruction::Shl: OpReg = X86::SHL16rCL; break;
1814  }
1815  } else if (I->getType()->isIntegerTy(32)) {
1816  CReg = X86::ECX;
1817  RC = &X86::GR32RegClass;
1818  switch (I->getOpcode()) {
1819  default: llvm_unreachable("Unexpected shift opcode");
1820  case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1821  case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1822  case Instruction::Shl: OpReg = X86::SHL32rCL; break;
1823  }
1824  } else if (I->getType()->isIntegerTy(64)) {
1825  CReg = X86::RCX;
1826  RC = &X86::GR64RegClass;
1827  switch (I->getOpcode()) {
1828  default: llvm_unreachable("Unexpected shift opcode");
1829  case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1830  case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1831  case Instruction::Shl: OpReg = X86::SHL64rCL; break;
1832  }
1833  } else {
1834  return false;
1835  }
1836 
1837  MVT VT;
1838  if (!isTypeLegal(I->getType(), VT))
1839  return false;
1840 
1841  Register Op0Reg = getRegForValue(I->getOperand(0));
1842  if (Op0Reg == 0) return false;
1843 
1844  Register Op1Reg = getRegForValue(I->getOperand(1));
1845  if (Op1Reg == 0) return false;
1846  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1847  CReg).addReg(Op1Reg);
1848 
1849  // The shift instruction uses X86::CL. If we defined a super-register
1850  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1851  if (CReg != X86::CL)
1852  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1853  TII.get(TargetOpcode::KILL), X86::CL)
1854  .addReg(CReg, RegState::Kill);
1855 
1856  Register ResultReg = createResultReg(RC);
1857  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
1858  .addReg(Op0Reg);
1859  updateValueMap(I, ResultReg);
1860  return true;
1861 }
1862 
1863 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1864  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1865  const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1866  const static bool S = true; // IsSigned
1867  const static bool U = false; // !IsSigned
1868  const static unsigned Copy = TargetOpcode::COPY;
1869  // For the X86 DIV/IDIV instruction, in most cases the dividend
1870  // (numerator) must be in a specific register pair highreg:lowreg,
1871  // producing the quotient in lowreg and the remainder in highreg.
1872  // For most data types, to set up the instruction, the dividend is
1873  // copied into lowreg, and lowreg is sign-extended or zero-extended
1874  // into highreg. The exception is i8, where the dividend is defined
1875  // as a single register rather than a register pair, and we
1876  // therefore directly sign-extend or zero-extend the dividend into
1877  // lowreg, instead of copying, and ignore the highreg.
1878  const static struct DivRemEntry {
1879  // The following portion depends only on the data type.
1880  const TargetRegisterClass *RC;
1881  unsigned LowInReg; // low part of the register pair
1882  unsigned HighInReg; // high part of the register pair
1883  // The following portion depends on both the data type and the operation.
1884  struct DivRemResult {
1885  unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1886  unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1887  // highreg, or copying a zero into highreg.
1888  unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1889  // zero/sign-extending into lowreg for i8.
1890  unsigned DivRemResultReg; // Register containing the desired result.
1891  bool IsOpSigned; // Whether to use signed or unsigned form.
1892  } ResultTable[NumOps];
1893  } OpTable[NumTypes] = {
1894  { &X86::GR8RegClass, X86::AX, 0, {
1895  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1896  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1897  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1898  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1899  }
1900  }, // i8
1901  { &X86::GR16RegClass, X86::AX, X86::DX, {
1902  { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1903  { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1904  { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
1905  { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
1906  }
1907  }, // i16
1908  { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1909  { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1910  { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1911  { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1912  { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1913  }
1914  }, // i32
1915  { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1916  { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1917  { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1918  { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
1919  { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
1920  }
1921  }, // i64
1922  };
1923 
1924  MVT VT;
1925  if (!isTypeLegal(I->getType(), VT))
1926  return false;
1927 
1928  unsigned TypeIndex, OpIndex;
1929  switch (VT.SimpleTy) {
1930  default: return false;
1931  case MVT::i8: TypeIndex = 0; break;
1932  case MVT::i16: TypeIndex = 1; break;
1933  case MVT::i32: TypeIndex = 2; break;
1934  case MVT::i64: TypeIndex = 3;
1935  if (!Subtarget->is64Bit())
1936  return false;
1937  break;
1938  }
1939 
1940  switch (I->getOpcode()) {
1941  default: llvm_unreachable("Unexpected div/rem opcode");
1942  case Instruction::SDiv: OpIndex = 0; break;
1943  case Instruction::SRem: OpIndex = 1; break;
1944  case Instruction::UDiv: OpIndex = 2; break;
1945  case Instruction::URem: OpIndex = 3; break;
1946  }
1947 
1948  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1949  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1950  Register Op0Reg = getRegForValue(I->getOperand(0));
1951  if (Op0Reg == 0)
1952  return false;
1953  Register Op1Reg = getRegForValue(I->getOperand(1));
1954  if (Op1Reg == 0)
1955  return false;
1956 
1957  // Move op0 into low-order input register.
1958  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1959  TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1960  // Zero-extend or sign-extend into high-order input register.
1961  if (OpEntry.OpSignExtend) {
1962  if (OpEntry.IsOpSigned)
1963  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1964  TII.get(OpEntry.OpSignExtend));
1965  else {
1966  Register Zero32 = createResultReg(&X86::GR32RegClass);
1967  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1968  TII.get(X86::MOV32r0), Zero32);
1969 
1970  // Copy the zero into the appropriate sub/super/identical physical
1971  // register. Unfortunately the operations needed are not uniform enough
1972  // to fit neatly into the table above.
1973  if (VT == MVT::i16) {
1974  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1975  TII.get(Copy), TypeEntry.HighInReg)
1976  .addReg(Zero32, 0, X86::sub_16bit);
1977  } else if (VT == MVT::i32) {
1978  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1979  TII.get(Copy), TypeEntry.HighInReg)
1980  .addReg(Zero32);
1981  } else if (VT == MVT::i64) {
1982  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1983  TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1984  .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1985  }
1986  }
1987  }
1988  // Generate the DIV/IDIV instruction.
1989  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1990  TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1991  // For i8 remainder, we can't reference ah directly, as we'll end
1992  // up with bogus copies like %r9b = COPY %ah. Reference ax
1993  // instead to prevent ah references in a rex instruction.
1994  //
1995  // The current assumption of the fast register allocator is that isel
1996  // won't generate explicit references to the GR8_NOREX registers. If
1997  // the allocator and/or the backend get enhanced to be more robust in
1998  // that regard, this can be, and should be, removed.
1999  unsigned ResultReg = 0;
2000  if ((I->getOpcode() == Instruction::SRem ||
2001  I->getOpcode() == Instruction::URem) &&
2002  OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
2003  Register SourceSuperReg = createResultReg(&X86::GR16RegClass);
2004  Register ResultSuperReg = createResultReg(&X86::GR16RegClass);
2005  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2006  TII.get(Copy), SourceSuperReg).addReg(X86::AX);
2007 
2008  // Shift AX right by 8 bits instead of using AH.
2009  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
2010  ResultSuperReg).addReg(SourceSuperReg).addImm(8);
2011 
2012  // Now reference the 8-bit subreg of the result.
2013  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
2014  X86::sub_8bit);
2015  }
2016  // Copy the result out of the physreg if we haven't already.
2017  if (!ResultReg) {
2018  ResultReg = createResultReg(TypeEntry.RC);
2019  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
2020  .addReg(OpEntry.DivRemResultReg);
2021  }
2022  updateValueMap(I, ResultReg);
2023 
2024  return true;
2025 }
2026 
2027 /// Emit a conditional move instruction (if the are supported) to lower
2028 /// the select.
2029 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
2030  // Check if the subtarget supports these instructions.
2031  if (!Subtarget->canUseCMOV())
2032  return false;
2033 
2034  // FIXME: Add support for i8.
2035  if (RetVT < MVT::i16 || RetVT > MVT::i64)
2036  return false;
2037 
2038  const Value *Cond = I->getOperand(0);
2039  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2040  bool NeedTest = true;
2042 
2043  // Optimize conditions coming from a compare if both instructions are in the
2044  // same basic block (values defined in other basic blocks may not have
2045  // initialized registers).
2046  const auto *CI = dyn_cast<CmpInst>(Cond);
2047  if (CI && (CI->getParent() == I->getParent())) {
2048  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2049 
2050  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2051  static const uint16_t SETFOpcTable[2][3] = {
2052  { X86::COND_NP, X86::COND_E, X86::TEST8rr },
2053  { X86::COND_P, X86::COND_NE, X86::OR8rr }
2054  };
2055  const uint16_t *SETFOpc = nullptr;
2056  switch (Predicate) {
2057  default: break;
2058  case CmpInst::FCMP_OEQ:
2059  SETFOpc = &SETFOpcTable[0][0];
2061  break;
2062  case CmpInst::FCMP_UNE:
2063  SETFOpc = &SETFOpcTable[1][0];
2065  break;
2066  }
2067 
2068  bool NeedSwap;
2069  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
2070  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
2071 
2072  const Value *CmpLHS = CI->getOperand(0);
2073  const Value *CmpRHS = CI->getOperand(1);
2074  if (NeedSwap)
2075  std::swap(CmpLHS, CmpRHS);
2076 
2077  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2078  // Emit a compare of the LHS and RHS, setting the flags.
2079  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2080  return false;
2081 
2082  if (SETFOpc) {
2083  Register FlagReg1 = createResultReg(&X86::GR8RegClass);
2084  Register FlagReg2 = createResultReg(&X86::GR8RegClass);
2085  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2086  FlagReg1).addImm(SETFOpc[0]);
2087  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2088  FlagReg2).addImm(SETFOpc[1]);
2089  auto const &II = TII.get(SETFOpc[2]);
2090  if (II.getNumDefs()) {
2091  Register TmpReg = createResultReg(&X86::GR8RegClass);
2092  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
2093  .addReg(FlagReg2).addReg(FlagReg1);
2094  } else {
2095  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2096  .addReg(FlagReg2).addReg(FlagReg1);
2097  }
2098  }
2099  NeedTest = false;
2100  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
2101  // Fake request the condition, otherwise the intrinsic might be completely
2102  // optimized away.
2103  Register TmpReg = getRegForValue(Cond);
2104  if (TmpReg == 0)
2105  return false;
2106 
2107  NeedTest = false;
2108  }
2109 
2110  if (NeedTest) {
2111  // Selects operate on i1, however, CondReg is 8 bits width and may contain
2112  // garbage. Indeed, only the less significant bit is supposed to be
2113  // accurate. If we read more than the lsb, we may see non-zero values
2114  // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2115  // the select. This is achieved by performing TEST against 1.
2116  Register CondReg = getRegForValue(Cond);
2117  if (CondReg == 0)
2118  return false;
2119 
2120  // In case OpReg is a K register, COPY to a GPR
2121  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2122  unsigned KCondReg = CondReg;
2123  CondReg = createResultReg(&X86::GR32RegClass);
2124  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2125  TII.get(TargetOpcode::COPY), CondReg)
2126  .addReg(KCondReg);
2127  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, X86::sub_8bit);
2128  }
2129  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2130  .addReg(CondReg)
2131  .addImm(1);
2132  }
2133 
2134  const Value *LHS = I->getOperand(1);
2135  const Value *RHS = I->getOperand(2);
2136 
2137  Register RHSReg = getRegForValue(RHS);
2138  Register LHSReg = getRegForValue(LHS);
2139  if (!LHSReg || !RHSReg)
2140  return false;
2141 
2142  const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
2143  unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(*RC)/8);
2144  Register ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, LHSReg, CC);
2145  updateValueMap(I, ResultReg);
2146  return true;
2147 }
2148 
2149 /// Emit SSE or AVX instructions to lower the select.
2150 ///
2151 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2152 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2153 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2154 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
2155  // Optimize conditions coming from a compare if both instructions are in the
2156  // same basic block (values defined in other basic blocks may not have
2157  // initialized registers).
2158  const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
2159  if (!CI || (CI->getParent() != I->getParent()))
2160  return false;
2161 
2162  if (I->getType() != CI->getOperand(0)->getType() ||
2163  !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
2164  (Subtarget->hasSSE2() && RetVT == MVT::f64)))
2165  return false;
2166 
2167  const Value *CmpLHS = CI->getOperand(0);
2168  const Value *CmpRHS = CI->getOperand(1);
2169  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2170 
2171  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2172  // We don't have to materialize a zero constant for this case and can just use
2173  // %x again on the RHS.
2175  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2176  if (CmpRHSC && CmpRHSC->isNullValue())
2177  CmpRHS = CmpLHS;
2178  }
2179 
2180  unsigned CC;
2181  bool NeedSwap;
2182  std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
2183  if (CC > 7 && !Subtarget->hasAVX())
2184  return false;
2185 
2186  if (NeedSwap)
2187  std::swap(CmpLHS, CmpRHS);
2188 
2189  const Value *LHS = I->getOperand(1);
2190  const Value *RHS = I->getOperand(2);
2191 
2192  Register LHSReg = getRegForValue(LHS);
2193  Register RHSReg = getRegForValue(RHS);
2194  Register CmpLHSReg = getRegForValue(CmpLHS);
2195  Register CmpRHSReg = getRegForValue(CmpRHS);
2196  if (!LHSReg || !RHSReg || !CmpLHSReg || !CmpRHSReg)
2197  return false;
2198 
2199  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2200  unsigned ResultReg;
2201 
2202  if (Subtarget->hasAVX512()) {
2203  // If we have AVX512 we can use a mask compare and masked movss/sd.
2204  const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
2205  const TargetRegisterClass *VK1 = &X86::VK1RegClass;
2206 
2207  unsigned CmpOpcode =
2208  (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2209  Register CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpRHSReg,
2210  CC);
2211 
2212  // Need an IMPLICIT_DEF for the input that is used to generate the upper
2213  // bits of the result register since its not based on any of the inputs.
2214  Register ImplicitDefReg = createResultReg(VR128X);
2215  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2216  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2217 
2218  // Place RHSReg is the passthru of the masked movss/sd operation and put
2219  // LHS in the input. The mask input comes from the compare.
2220  unsigned MovOpcode =
2221  (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2222  unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, CmpReg,
2223  ImplicitDefReg, LHSReg);
2224 
2225  ResultReg = createResultReg(RC);
2226  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2227  TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
2228 
2229  } else if (Subtarget->hasAVX()) {
2230  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2231 
2232  // If we have AVX, create 1 blendv instead of 3 logic instructions.
2233  // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2234  // uses XMM0 as the selection register. That may need just as many
2235  // instructions as the AND/ANDN/OR sequence due to register moves, so
2236  // don't bother.
2237  unsigned CmpOpcode =
2238  (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2239  unsigned BlendOpcode =
2240  (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2241 
2242  Register CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpRHSReg,
2243  CC);
2244  Register VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, LHSReg,
2245  CmpReg);
2246  ResultReg = createResultReg(RC);
2247  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2248  TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
2249  } else {
2250  // Choose the SSE instruction sequence based on data type (float or double).
2251  static const uint16_t OpcTable[2][4] = {
2252  { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2253  { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2254  };
2255 
2256  const uint16_t *Opc = nullptr;
2257  switch (RetVT.SimpleTy) {
2258  default: return false;
2259  case MVT::f32: Opc = &OpcTable[0][0]; break;
2260  case MVT::f64: Opc = &OpcTable[1][0]; break;
2261  }
2262 
2263  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2264  Register CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpRHSReg, CC);
2265  Register AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, LHSReg);
2266  Register AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, RHSReg);
2267  Register OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, AndReg);
2268  ResultReg = createResultReg(RC);
2269  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2270  TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
2271  }
2272  updateValueMap(I, ResultReg);
2273  return true;
2274 }
2275 
2276 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
2277  // These are pseudo CMOV instructions and will be later expanded into control-
2278  // flow.
2279  unsigned Opc;
2280  switch (RetVT.SimpleTy) {
2281  default: return false;
2282  case MVT::i8: Opc = X86::CMOV_GR8; break;
2283  case MVT::i16: Opc = X86::CMOV_GR16; break;
2284  case MVT::f16: Opc = X86::CMOV_FR16X; break;
2285  case MVT::i32: Opc = X86::CMOV_GR32; break;
2286  case MVT::f32: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X
2287  : X86::CMOV_FR32; break;
2288  case MVT::f64: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X
2289  : X86::CMOV_FR64; break;
2290  }
2291 
2292  const Value *Cond = I->getOperand(0);
2294 
2295  // Optimize conditions coming from a compare if both instructions are in the
2296  // same basic block (values defined in other basic blocks may not have
2297  // initialized registers).
2298  const auto *CI = dyn_cast<CmpInst>(Cond);
2299  if (CI && (CI->getParent() == I->getParent())) {
2300  bool NeedSwap;
2301  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
2302  if (CC > X86::LAST_VALID_COND)
2303  return false;
2304 
2305  const Value *CmpLHS = CI->getOperand(0);
2306  const Value *CmpRHS = CI->getOperand(1);
2307 
2308  if (NeedSwap)
2309  std::swap(CmpLHS, CmpRHS);
2310 
2311  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2312  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2313  return false;
2314  } else {
2315  Register CondReg = getRegForValue(Cond);
2316  if (CondReg == 0)
2317  return false;
2318 
2319  // In case OpReg is a K register, COPY to a GPR
2320  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2321  unsigned KCondReg = CondReg;
2322  CondReg = createResultReg(&X86::GR32RegClass);
2323  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2324  TII.get(TargetOpcode::COPY), CondReg)
2325  .addReg(KCondReg);
2326  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, X86::sub_8bit);
2327  }
2328  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2329  .addReg(CondReg)
2330  .addImm(1);
2331  }
2332 
2333  const Value *LHS = I->getOperand(1);
2334  const Value *RHS = I->getOperand(2);
2335 
2336  Register LHSReg = getRegForValue(LHS);
2337  Register RHSReg = getRegForValue(RHS);
2338  if (!LHSReg || !RHSReg)
2339  return false;
2340 
2341  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2342 
2343  Register ResultReg =
2344  fastEmitInst_rri(Opc, RC, RHSReg, LHSReg, CC);
2345  updateValueMap(I, ResultReg);
2346  return true;
2347 }
2348 
2349 bool X86FastISel::X86SelectSelect(const Instruction *I) {
2350  MVT RetVT;
2351  if (!isTypeLegal(I->getType(), RetVT))
2352  return false;
2353 
2354  // Check if we can fold the select.
2355  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2356  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2357  const Value *Opnd = nullptr;
2358  switch (Predicate) {
2359  default: break;
2360  case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2361  case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
2362  }
2363  // No need for a select anymore - this is an unconditional move.
2364  if (Opnd) {
2365  Register OpReg = getRegForValue(Opnd);
2366  if (OpReg == 0)
2367  return false;
2368  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2369  Register ResultReg = createResultReg(RC);
2370  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2371  TII.get(TargetOpcode::COPY), ResultReg)
2372  .addReg(OpReg);
2373  updateValueMap(I, ResultReg);
2374  return true;
2375  }
2376  }
2377 
2378  // First try to use real conditional move instructions.
2379  if (X86FastEmitCMoveSelect(RetVT, I))
2380  return true;
2381 
2382  // Try to use a sequence of SSE instructions to simulate a conditional move.
2383  if (X86FastEmitSSESelect(RetVT, I))
2384  return true;
2385 
2386  // Fall-back to pseudo conditional move instructions, which will be later
2387  // converted to control-flow.
2388  if (X86FastEmitPseudoSelect(RetVT, I))
2389  return true;
2390 
2391  return false;
2392 }
2393 
2394 // Common code for X86SelectSIToFP and X86SelectUIToFP.
2395 bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
2396  // The target-independent selection algorithm in FastISel already knows how
2397  // to select a SINT_TO_FP if the target is SSE but not AVX.
2398  // Early exit if the subtarget doesn't have AVX.
2399  // Unsigned conversion requires avx512.
2400  bool HasAVX512 = Subtarget->hasAVX512();
2401  if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
2402  return false;
2403 
2404  // TODO: We could sign extend narrower types.
2405  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
2406  if (SrcVT != MVT::i32 && SrcVT != MVT::i64)
2407  return false;
2408 
2409  // Select integer to float/double conversion.
2410  Register OpReg = getRegForValue(I->getOperand(0));
2411  if (OpReg == 0)
2412  return false;
2413 
2414  unsigned Opcode;
2415 
2416  static const uint16_t SCvtOpc[2][2][2] = {
2417  { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr },
2418  { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } },
2419  { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
2420  { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
2421  };
2422  static const uint16_t UCvtOpc[2][2] = {
2423  { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
2424  { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
2425  };
2426  bool Is64Bit = SrcVT == MVT::i64;
2427 
2428  if (I->getType()->isDoubleTy()) {
2429  // s/uitofp int -> double
2430  Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
2431  } else if (I->getType()->isFloatTy()) {
2432  // s/uitofp int -> float
2433  Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
2434  } else
2435  return false;
2436 
2437  MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT();
2438  const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT);
2439  Register ImplicitDefReg = createResultReg(RC);
2440  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2441  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2442  Register ResultReg = fastEmitInst_rr(Opcode, RC, ImplicitDefReg, OpReg);
2443  updateValueMap(I, ResultReg);
2444  return true;
2445 }
2446 
2447 bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
2448  return X86SelectIntToFP(I, /*IsSigned*/true);
2449 }
2450 
2451 bool X86FastISel::X86SelectUIToFP(const Instruction *I) {
2452  return X86SelectIntToFP(I, /*IsSigned*/false);
2453 }
2454 
2455 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2456 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
2457  unsigned TargetOpc,
2458  const TargetRegisterClass *RC) {
2459  assert((I->getOpcode() == Instruction::FPExt ||
2460  I->getOpcode() == Instruction::FPTrunc) &&
2461  "Instruction must be an FPExt or FPTrunc!");
2462  bool HasAVX = Subtarget->hasAVX();
2463 
2464  Register OpReg = getRegForValue(I->getOperand(0));
2465  if (OpReg == 0)
2466  return false;
2467 
2468  unsigned ImplicitDefReg;
2469  if (HasAVX) {
2470  ImplicitDefReg = createResultReg(RC);
2471  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2472  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2473 
2474  }
2475 
2476  Register ResultReg = createResultReg(RC);
2477  MachineInstrBuilder MIB;
2478  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
2479  ResultReg);
2480 
2481  if (HasAVX)
2482  MIB.addReg(ImplicitDefReg);
2483 
2484  MIB.addReg(OpReg);
2485  updateValueMap(I, ResultReg);
2486  return true;
2487 }
2488 
2489 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2490  if (Subtarget->hasSSE2() && I->getType()->isDoubleTy() &&
2491  I->getOperand(0)->getType()->isFloatTy()) {
2492  bool HasAVX512 = Subtarget->hasAVX512();
2493  // fpext from float to double.
2494  unsigned Opc =
2495  HasAVX512 ? X86::VCVTSS2SDZrr
2496  : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2497  return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f64));
2498  }
2499 
2500  return false;
2501 }
2502 
2503 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2504  if (Subtarget->hasSSE2() && I->getType()->isFloatTy() &&
2505  I->getOperand(0)->getType()->isDoubleTy()) {
2506  bool HasAVX512 = Subtarget->hasAVX512();
2507  // fptrunc from double to float.
2508  unsigned Opc =
2509  HasAVX512 ? X86::VCVTSD2SSZrr
2510  : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2511  return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f32));
2512  }
2513 
2514  return false;
2515 }
2516 
2517 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2518  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
2519  EVT DstVT = TLI.getValueType(DL, I->getType());
2520 
2521  // This code only handles truncation to byte.
2522  if (DstVT != MVT::i8 && DstVT != MVT::i1)
2523  return false;
2524  if (!TLI.isTypeLegal(SrcVT))
2525  return false;
2526 
2527  Register InputReg = getRegForValue(I->getOperand(0));
2528  if (!InputReg)
2529  // Unhandled operand. Halt "fast" selection and bail.
2530  return false;
2531 
2532  if (SrcVT == MVT::i8) {
2533  // Truncate from i8 to i1; no code needed.
2534  updateValueMap(I, InputReg);
2535  return true;
2536  }
2537 
2538  // Issue an extract_subreg.
2539  Register ResultReg = fastEmitInst_extractsubreg(MVT::i8, InputReg,
2540  X86::sub_8bit);
2541  if (!ResultReg)
2542  return false;
2543 
2544  updateValueMap(I, ResultReg);
2545  return true;
2546 }
2547 
2548 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2549  return Len <= (Subtarget->is64Bit() ? 32 : 16);
2550 }
2551 
2552 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2553  X86AddressMode SrcAM, uint64_t Len) {
2554 
2555  // Make sure we don't bloat code by inlining very large memcpy's.
2556  if (!IsMemcpySmall(Len))
2557  return false;
2558 
2559  bool i64Legal = Subtarget->is64Bit();
2560 
2561  // We don't care about alignment here since we just emit integer accesses.
2562  while (Len) {
2563  MVT VT;
2564  if (Len >= 8 && i64Legal)
2565  VT = MVT::i64;
2566  else if (Len >= 4)
2567  VT = MVT::i32;
2568  else if (Len >= 2)
2569  VT = MVT::i16;
2570  else
2571  VT = MVT::i8;
2572 
2573  unsigned Reg;
2574  bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2575  RV &= X86FastEmitStore(VT, Reg, DestAM);
2576  assert(RV && "Failed to emit load or store??");
2577  (void)RV;
2578 
2579  unsigned Size = VT.getSizeInBits()/8;
2580  Len -= Size;
2581  DestAM.Disp += Size;
2582  SrcAM.Disp += Size;
2583  }
2584 
2585  return true;
2586 }
2587 
2588 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2589  // FIXME: Handle more intrinsics.
2590  switch (II->getIntrinsicID()) {
2591  default: return false;
2592  case Intrinsic::convert_from_fp16:
2593  case Intrinsic::convert_to_fp16: {
2594  if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2595  return false;
2596 
2597  const Value *Op = II->getArgOperand(0);
2598  Register InputReg = getRegForValue(Op);
2599  if (InputReg == 0)
2600  return false;
2601 
2602  // F16C only allows converting from float to half and from half to float.
2603  bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
2604  if (IsFloatToHalf) {
2605  if (!Op->getType()->isFloatTy())
2606  return false;
2607  } else {
2608  if (!II->getType()->isFloatTy())
2609  return false;
2610  }
2611 
2612  unsigned ResultReg = 0;
2613  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
2614  if (IsFloatToHalf) {
2615  // 'InputReg' is implicitly promoted from register class FR32 to
2616  // register class VR128 by method 'constrainOperandRegClass' which is
2617  // directly called by 'fastEmitInst_ri'.
2618  // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2619  // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2620  // It's consistent with the other FP instructions, which are usually
2621  // controlled by MXCSR.
2622  unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPS2PHZ128rr
2623  : X86::VCVTPS2PHrr;
2624  InputReg = fastEmitInst_ri(Opc, RC, InputReg, 4);
2625 
2626  // Move the lower 32-bits of ResultReg to another register of class GR32.
2627  Opc = Subtarget->hasAVX512() ? X86::VMOVPDI2DIZrr
2628  : X86::VMOVPDI2DIrr;
2629  ResultReg = createResultReg(&X86::GR32RegClass);
2630  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2631  .addReg(InputReg, RegState::Kill);
2632 
2633  // The result value is in the lower 16-bits of ResultReg.
2634  unsigned RegIdx = X86::sub_16bit;
2635  ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, RegIdx);
2636  } else {
2637  assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2638  // Explicitly zero-extend the input to 32-bit.
2639  InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::ZERO_EXTEND, InputReg);
2640 
2641  // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2642  InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2643  InputReg);
2644 
2645  unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPH2PSZ128rr
2646  : X86::VCVTPH2PSrr;
2647  InputReg = fastEmitInst_r(Opc, RC, InputReg);
2648 
2649  // The result value is in the lower 32-bits of ResultReg.
2650  // Emit an explicit copy from register class VR128 to register class FR32.
2651  ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
2652  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2653  TII.get(TargetOpcode::COPY), ResultReg)
2654  .addReg(InputReg, RegState::Kill);
2655  }
2656 
2657  updateValueMap(II, ResultReg);
2658  return true;
2659  }
2660  case Intrinsic::frameaddress: {
2661  MachineFunction *MF = FuncInfo.MF;
2662  if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
2663  return false;
2664 
2665  Type *RetTy = II->getCalledFunction()->getReturnType();
2666 
2667  MVT VT;
2668  if (!isTypeLegal(RetTy, VT))
2669  return false;
2670 
2671  unsigned Opc;
2672  const TargetRegisterClass *RC = nullptr;
2673 
2674  switch (VT.SimpleTy) {
2675  default: llvm_unreachable("Invalid result type for frameaddress.");
2676  case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2677  case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2678  }
2679 
2680  // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2681  // we get the wrong frame register.
2682  MachineFrameInfo &MFI = MF->getFrameInfo();
2683  MFI.setFrameAddressIsTaken(true);
2684 
2685  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2686  unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
2687  assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2688  (FrameReg == X86::EBP && VT == MVT::i32)) &&
2689  "Invalid Frame Register!");
2690 
2691  // Always make a copy of the frame register to a vreg first, so that we
2692  // never directly reference the frame register (the TwoAddressInstruction-
2693  // Pass doesn't like that).
2694  Register SrcReg = createResultReg(RC);
2695  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2696  TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2697 
2698  // Now recursively load from the frame address.
2699  // movq (%rbp), %rax
2700  // movq (%rax), %rax
2701  // movq (%rax), %rax
2702  // ...
2703  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2704  while (Depth--) {
2705  Register DestReg = createResultReg(RC);
2706  addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2707  TII.get(Opc), DestReg), SrcReg);
2708  SrcReg = DestReg;
2709  }
2710 
2711  updateValueMap(II, SrcReg);
2712  return true;
2713  }
2714  case Intrinsic::memcpy: {
2715  const MemCpyInst *MCI = cast<MemCpyInst>(II);
2716  // Don't handle volatile or variable length memcpys.
2717  if (MCI->isVolatile())
2718  return false;
2719 
2720  if (isa<ConstantInt>(MCI->getLength())) {
2721  // Small memcpy's are common enough that we want to do them
2722  // without a call if possible.
2723  uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
2724  if (IsMemcpySmall(Len)) {
2725  X86AddressMode DestAM, SrcAM;
2726  if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
2727  !X86SelectAddress(MCI->getRawSource(), SrcAM))
2728  return false;
2729  TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2730  return true;
2731  }
2732  }
2733 
2734  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2735  if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
2736  return false;
2737 
2738  if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
2739  return false;
2740 
2741  return lowerCallTo(II, "memcpy", II->arg_size() - 1);
2742  }
2743  case Intrinsic::memset: {
2744  const MemSetInst *MSI = cast<MemSetInst>(II);
2745 
2746  if (MSI->isVolatile())
2747  return false;
2748 
2749  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2750  if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
2751  return false;
2752 
2753  if (MSI->getDestAddressSpace() > 255)
2754  return false;
2755 
2756  return lowerCallTo(II, "memset", II->arg_size() - 1);
2757  }
2758  case Intrinsic::stackprotector: {
2759  // Emit code to store the stack guard onto the stack.
2760  EVT PtrTy = TLI.getPointerTy(DL);
2761 
2762  const Value *Op1 = II->getArgOperand(0); // The guard's value.
2763  const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
2764 
2765  MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2766 
2767  // Grab the frame index.
2768  X86AddressMode AM;
2769  if (!X86SelectAddress(Slot, AM)) return false;
2770  if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2771  return true;
2772  }
2773  case Intrinsic::dbg_declare: {
2774  const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
2775  X86AddressMode AM;
2776  assert(DI->getAddress() && "Null address should be checked earlier!");
2777  if (!X86SelectAddress(DI->getAddress(), AM))
2778  return false;
2779  const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2781  "Expected inlined-at fields to agree");
2782  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
2783  .addImm(0)
2784  .addMetadata(DI->getVariable())
2785  .addMetadata(DI->getExpression());
2786  return true;
2787  }
2788  case Intrinsic::trap: {
2789  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
2790  return true;
2791  }
2792  case Intrinsic::sqrt: {
2793  if (!Subtarget->hasSSE1())
2794  return false;
2795 
2796  Type *RetTy = II->getCalledFunction()->getReturnType();
2797 
2798  MVT VT;
2799  if (!isTypeLegal(RetTy, VT))
2800  return false;
2801 
2802  // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2803  // is not generated by FastISel yet.
2804  // FIXME: Update this code once tablegen can handle it.
2805  static const uint16_t SqrtOpc[3][2] = {
2806  { X86::SQRTSSr, X86::SQRTSDr },
2807  { X86::VSQRTSSr, X86::VSQRTSDr },
2808  { X86::VSQRTSSZr, X86::VSQRTSDZr },
2809  };
2810  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
2811  Subtarget->hasAVX() ? 1 :
2812  0;
2813  unsigned Opc;
2814  switch (VT.SimpleTy) {
2815  default: return false;
2816  case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break;
2817  case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break;
2818  }
2819 
2820  const Value *SrcVal = II->getArgOperand(0);
2821  Register SrcReg = getRegForValue(SrcVal);
2822 
2823  if (SrcReg == 0)
2824  return false;
2825 
2826  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
2827  unsigned ImplicitDefReg = 0;
2828  if (AVXLevel > 0) {
2829  ImplicitDefReg = createResultReg(RC);
2830  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2831  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2832  }
2833 
2834  Register ResultReg = createResultReg(RC);
2835  MachineInstrBuilder MIB;
2836  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2837  ResultReg);
2838 
2839  if (ImplicitDefReg)
2840  MIB.addReg(ImplicitDefReg);
2841 
2842  MIB.addReg(SrcReg);
2843 
2844  updateValueMap(II, ResultReg);
2845  return true;
2846  }
2847  case Intrinsic::sadd_with_overflow:
2848  case Intrinsic::uadd_with_overflow:
2849  case Intrinsic::ssub_with_overflow:
2850  case Intrinsic::usub_with_overflow:
2851  case Intrinsic::smul_with_overflow:
2852  case Intrinsic::umul_with_overflow: {
2853  // This implements the basic lowering of the xalu with overflow intrinsics
2854  // into add/sub/mul followed by either seto or setb.
2855  const Function *Callee = II->getCalledFunction();
2856  auto *Ty = cast<StructType>(Callee->getReturnType());
2857  Type *RetTy = Ty->getTypeAtIndex(0U);
2858  assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
2859  Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2860  "Overflow value expected to be an i1");
2861 
2862  MVT VT;
2863  if (!isTypeLegal(RetTy, VT))
2864  return false;
2865 
2866  if (VT < MVT::i8 || VT > MVT::i64)
2867  return false;
2868 
2869  const Value *LHS = II->getArgOperand(0);
2870  const Value *RHS = II->getArgOperand(1);
2871 
2872  // Canonicalize immediate to the RHS.
2873  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
2874  std::swap(LHS, RHS);
2875 
2876  unsigned BaseOpc, CondCode;
2877  switch (II->getIntrinsicID()) {
2878  default: llvm_unreachable("Unexpected intrinsic!");
2879  case Intrinsic::sadd_with_overflow:
2880  BaseOpc = ISD::ADD; CondCode = X86::COND_O; break;
2881  case Intrinsic::uadd_with_overflow:
2882  BaseOpc = ISD::ADD; CondCode = X86::COND_B; break;
2883  case Intrinsic::ssub_with_overflow:
2884  BaseOpc = ISD::SUB; CondCode = X86::COND_O; break;
2885  case Intrinsic::usub_with_overflow:
2886  BaseOpc = ISD::SUB; CondCode = X86::COND_B; break;
2887  case Intrinsic::smul_with_overflow:
2888  BaseOpc = X86ISD::SMUL; CondCode = X86::COND_O; break;
2889  case Intrinsic::umul_with_overflow:
2890  BaseOpc = X86ISD::UMUL; CondCode = X86::COND_O; break;
2891  }
2892 
2893  Register LHSReg = getRegForValue(LHS);
2894  if (LHSReg == 0)
2895  return false;
2896 
2897  unsigned ResultReg = 0;
2898  // Check if we have an immediate version.
2899  if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2900  static const uint16_t Opc[2][4] = {
2901  { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2902  { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2903  };
2904 
2905  if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
2906  CondCode == X86::COND_O) {
2907  // We can use INC/DEC.
2908  ResultReg = createResultReg(TLI.getRegClassFor(VT));
2909  bool IsDec = BaseOpc == ISD::SUB;
2910  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2911  TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2912  .addReg(LHSReg);
2913  } else
2914  ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, CI->getZExtValue());
2915  }
2916 
2917  unsigned RHSReg;
2918  if (!ResultReg) {
2919  RHSReg = getRegForValue(RHS);
2920  if (RHSReg == 0)
2921  return false;
2922  ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, RHSReg);
2923  }
2924 
2925  // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2926  // it manually.
2927  if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2928  static const uint16_t MULOpc[] =
2929  { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2930  static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2931  // First copy the first operand into RAX, which is an implicit input to
2932  // the X86::MUL*r instruction.
2933  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2934  TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2935  .addReg(LHSReg);
2936  ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2937  TLI.getRegClassFor(VT), RHSReg);
2938  } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2939  static const uint16_t MULOpc[] =
2940  { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2941  if (VT == MVT::i8) {
2942  // Copy the first operand into AL, which is an implicit input to the
2943  // X86::IMUL8r instruction.
2944  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2945  TII.get(TargetOpcode::COPY), X86::AL)
2946  .addReg(LHSReg);
2947  ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg);
2948  } else
2949  ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2950  TLI.getRegClassFor(VT), LHSReg, RHSReg);
2951  }
2952 
2953  if (!ResultReg)
2954  return false;
2955 
2956  // Assign to a GPR since the overflow return value is lowered to a SETcc.
2957  Register ResultReg2 = createResultReg(&X86::GR8RegClass);
2958  assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2959  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2960  ResultReg2).addImm(CondCode);
2961 
2962  updateValueMap(II, ResultReg, 2);
2963  return true;
2964  }
2965  case Intrinsic::x86_sse_cvttss2si:
2966  case Intrinsic::x86_sse_cvttss2si64:
2967  case Intrinsic::x86_sse2_cvttsd2si:
2968  case Intrinsic::x86_sse2_cvttsd2si64: {
2969  bool IsInputDouble;
2970  switch (II->getIntrinsicID()) {
2971  default: llvm_unreachable("Unexpected intrinsic.");
2972  case Intrinsic::x86_sse_cvttss2si:
2973  case Intrinsic::x86_sse_cvttss2si64:
2974  if (!Subtarget->hasSSE1())
2975  return false;
2976  IsInputDouble = false;
2977  break;
2978  case Intrinsic::x86_sse2_cvttsd2si:
2979  case Intrinsic::x86_sse2_cvttsd2si64:
2980  if (!Subtarget->hasSSE2())
2981  return false;
2982  IsInputDouble = true;
2983  break;
2984  }
2985 
2986  Type *RetTy = II->getCalledFunction()->getReturnType();
2987  MVT VT;
2988  if (!isTypeLegal(RetTy, VT))
2989  return false;
2990 
2991  static const uint16_t CvtOpc[3][2][2] = {
2992  { { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr },
2993  { X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } },
2994  { { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr },
2995  { X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } },
2996  { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr },
2997  { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } },
2998  };
2999  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
3000  Subtarget->hasAVX() ? 1 :
3001  0;
3002  unsigned Opc;
3003  switch (VT.SimpleTy) {
3004  default: llvm_unreachable("Unexpected result type.");
3005  case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0]; break;
3006  case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1]; break;
3007  }
3008 
3009  // Check if we can fold insertelement instructions into the convert.
3010  const Value *Op = II->getArgOperand(0);
3011  while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
3012  const Value *Index = IE->getOperand(2);
3013  if (!isa<ConstantInt>(Index))
3014  break;
3015  unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
3016 
3017  if (Idx == 0) {
3018  Op = IE->getOperand(1);
3019  break;
3020  }
3021  Op = IE->getOperand(0);
3022  }
3023 
3024  Register Reg = getRegForValue(Op);
3025  if (Reg == 0)
3026  return false;
3027 
3028  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3029  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3030  .addReg(Reg);
3031 
3032  updateValueMap(II, ResultReg);
3033  return true;
3034  }
3035  }
3036 }
3037 
3038 bool X86FastISel::fastLowerArguments() {
3039  if (!FuncInfo.CanLowerReturn)
3040  return false;
3041 
3042  const Function *F = FuncInfo.Fn;
3043  if (F->isVarArg())
3044  return false;
3045 
3046  CallingConv::ID CC = F->getCallingConv();
3047  if (CC != CallingConv::C)
3048  return false;
3049 
3050  if (Subtarget->isCallingConvWin64(CC))
3051  return false;
3052 
3053  if (!Subtarget->is64Bit())
3054  return false;
3055 
3056  if (Subtarget->useSoftFloat())
3057  return false;
3058 
3059  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3060  unsigned GPRCnt = 0;
3061  unsigned FPRCnt = 0;
3062  for (auto const &Arg : F->args()) {
3063  if (Arg.hasAttribute(Attribute::ByVal) ||
3064  Arg.hasAttribute(Attribute::InReg) ||
3065  Arg.hasAttribute(Attribute::StructRet) ||
3066  Arg.hasAttribute(Attribute::SwiftSelf) ||
3067  Arg.hasAttribute(Attribute::SwiftAsync) ||
3068  Arg.hasAttribute(Attribute::SwiftError) ||
3069  Arg.hasAttribute(Attribute::Nest))
3070  return false;
3071 
3072  Type *ArgTy = Arg.getType();
3073  if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3074  return false;
3075 
3076  EVT ArgVT = TLI.getValueType(DL, ArgTy);
3077  if (!ArgVT.isSimple()) return false;
3078  switch (ArgVT.getSimpleVT().SimpleTy) {
3079  default: return false;
3080  case MVT::i32:
3081  case MVT::i64:
3082  ++GPRCnt;
3083  break;
3084  case MVT::f32:
3085  case MVT::f64:
3086  if (!Subtarget->hasSSE1())
3087  return false;
3088  ++FPRCnt;
3089  break;
3090  }
3091 
3092  if (GPRCnt > 6)
3093  return false;
3094 
3095  if (FPRCnt > 8)
3096  return false;
3097  }
3098 
3099  static const MCPhysReg GPR32ArgRegs[] = {
3100  X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
3101  };
3102  static const MCPhysReg GPR64ArgRegs[] = {
3103  X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3104  };
3105  static const MCPhysReg XMMArgRegs[] = {
3106  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3107  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3108  };
3109 
3110  unsigned GPRIdx = 0;
3111  unsigned FPRIdx = 0;
3112  for (auto const &Arg : F->args()) {
3113  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3114  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
3115  unsigned SrcReg;
3116  switch (VT.SimpleTy) {
3117  default: llvm_unreachable("Unexpected value type.");
3118  case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
3119  case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
3120  case MVT::f32: LLVM_FALLTHROUGH;
3121  case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
3122  }
3123  Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3124  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3125  // Without this, EmitLiveInCopies may eliminate the livein if its only
3126  // use is a bitcast (which isn't turned into an instruction).
3127  Register ResultReg = createResultReg(RC);
3128  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3129  TII.get(TargetOpcode::COPY), ResultReg)
3130  .addReg(DstReg, getKillRegState(true));
3131  updateValueMap(&Arg, ResultReg);
3132  }
3133  return true;
3134 }
3135 
3136 static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
3137  CallingConv::ID CC,
3138  const CallBase *CB) {
3139  if (Subtarget->is64Bit())
3140  return 0;
3141  if (Subtarget->getTargetTriple().isOSMSVCRT())
3142  return 0;
3143  if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3144  CC == CallingConv::HiPE || CC == CallingConv::Tail ||
3145  CC == CallingConv::SwiftTail)
3146  return 0;
3147 
3148  if (CB)
3149  if (CB->arg_empty() || !CB->paramHasAttr(0, Attribute::StructRet) ||
3150  CB->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
3151  return 0;
3152 
3153  return 4;
3154 }
3155 
3156 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3157  auto &OutVals = CLI.OutVals;
3158  auto &OutFlags = CLI.OutFlags;
3159  auto &OutRegs = CLI.OutRegs;
3160  auto &Ins = CLI.Ins;
3161  auto &InRegs = CLI.InRegs;
3162  CallingConv::ID CC = CLI.CallConv;
3163  bool &IsTailCall = CLI.IsTailCall;
3164  bool IsVarArg = CLI.IsVarArg;
3165  const Value *Callee = CLI.Callee;
3166  MCSymbol *Symbol = CLI.Symbol;
3167  const auto *CB = CLI.CB;
3168 
3169  bool Is64Bit = Subtarget->is64Bit();
3170  bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3171 
3172  // Call / invoke instructions with NoCfCheck attribute require special
3173  // handling.
3174  if (CB && CB->doesNoCfCheck())
3175  return false;
3176 
3177  // Functions with no_caller_saved_registers that need special handling.
3178  if ((CB && isa<CallInst>(CB) && CB->hasFnAttr("no_caller_saved_registers")))
3179  return false;
3180 
3181  // Functions with no_callee_saved_registers that need special handling.
3182  if ((CB && CB->hasFnAttr("no_callee_saved_registers")))
3183  return false;
3184 
3185  // Functions using thunks for indirect calls need to use SDISel.
3186  if (Subtarget->useIndirectThunkCalls())
3187  return false;
3188 
3189  // Handle only C, fastcc, and webkit_js calling conventions for now.
3190  switch (CC) {
3191  default: return false;
3192  case CallingConv::C:
3193  case CallingConv::Fast:
3194  case CallingConv::Tail:
3196  case CallingConv::Swift:
3201  case CallingConv::Win64:
3204  break;
3205  }
3206 
3207  // Allow SelectionDAG isel to handle tail calls.
3208  if (IsTailCall)
3209  return false;
3210 
3211  // fastcc with -tailcallopt is intended to provide a guaranteed
3212  // tail call optimization. Fastisel doesn't know how to do that.
3213  if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
3215  return false;
3216 
3217  // Don't know how to handle Win64 varargs yet. Nothing special needed for
3218  // x86-32. Special handling for x86-64 is implemented.
3219  if (IsVarArg && IsWin64)
3220  return false;
3221 
3222  // Don't know about inalloca yet.
3223  if (CLI.CB && CLI.CB->hasInAllocaArgument())
3224  return false;
3225 
3226  for (auto Flag : CLI.OutFlags)
3227  if (Flag.isSwiftError() || Flag.isPreallocated())
3228  return false;
3229 
3230  SmallVector<MVT, 16> OutVTs;
3231  SmallVector<unsigned, 16> ArgRegs;
3232 
3233  // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3234  // instruction. This is safe because it is common to all FastISel supported
3235  // calling conventions on x86.
3236  for (int i = 0, e = OutVals.size(); i != e; ++i) {
3237  Value *&Val = OutVals[i];
3238  ISD::ArgFlagsTy Flags = OutFlags[i];
3239  if (auto *CI = dyn_cast<ConstantInt>(Val)) {
3240  if (CI->getBitWidth() < 32) {
3241  if (Flags.isSExt())
3243  else
3245  }
3246  }
3247 
3248  // Passing bools around ends up doing a trunc to i1 and passing it.
3249  // Codegen this as an argument + "and 1".
3250  MVT VT;
3251  auto *TI = dyn_cast<TruncInst>(Val);
3252  unsigned ResultReg;
3253  if (TI && TI->getType()->isIntegerTy(1) && CLI.CB &&
3254  (TI->getParent() == CLI.CB->getParent()) && TI->hasOneUse()) {
3255  Value *PrevVal = TI->getOperand(0);
3256  ResultReg = getRegForValue(PrevVal);
3257 
3258  if (!ResultReg)
3259  return false;
3260 
3261  if (!isTypeLegal(PrevVal->getType(), VT))
3262  return false;
3263 
3264  ResultReg = fastEmit_ri(VT, VT, ISD::AND, ResultReg, 1);
3265  } else {
3266  if (!isTypeLegal(Val->getType(), VT) ||
3267  (VT.isVector() && VT.getVectorElementType() == MVT::i1))
3268  return false;
3269  ResultReg = getRegForValue(Val);
3270  }
3271 
3272  if (!ResultReg)
3273  return false;
3274 
3275  ArgRegs.push_back(ResultReg);
3276  OutVTs.push_back(VT);
3277  }
3278 
3279  // Analyze operands of the call, assigning locations to each operand.
3281  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
3282 
3283  // Allocate shadow area for Win64
3284  if (IsWin64)
3285  CCInfo.AllocateStack(32, Align(8));
3286 
3287  CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
3288 
3289  // Get a count of how many bytes are to be pushed on the stack.
3290  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3291 
3292  // Issue CALLSEQ_START
3293  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3294  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3295  .addImm(NumBytes).addImm(0).addImm(0);
3296 
3297  // Walk the register/memloc assignments, inserting copies/loads.
3298  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3299  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3300  CCValAssign const &VA = ArgLocs[i];
3301  const Value *ArgVal = OutVals[VA.getValNo()];
3302  MVT ArgVT = OutVTs[VA.getValNo()];
3303 
3304  if (ArgVT == MVT::x86mmx)
3305  return false;
3306 
3307  unsigned ArgReg = ArgRegs[VA.getValNo()];
3308 
3309  // Promote the value if needed.
3310  switch (VA.getLocInfo()) {
3311  case CCValAssign::Full: break;
3312  case CCValAssign::SExt: {
3313  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3314  "Unexpected extend");
3315 
3316  if (ArgVT == MVT::i1)
3317  return false;
3318 
3319  bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3320  ArgVT, ArgReg);
3321  assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
3322  ArgVT = VA.getLocVT();
3323  break;
3324  }
3325  case CCValAssign::ZExt: {
3326  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3327  "Unexpected extend");
3328 
3329  // Handle zero-extension from i1 to i8, which is common.
3330  if (ArgVT == MVT::i1) {
3331  // Set the high bits to zero.
3332  ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg);
3333  ArgVT = MVT::i8;
3334 
3335  if (ArgReg == 0)
3336  return false;
3337  }
3338 
3339  bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3340  ArgVT, ArgReg);
3341  assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
3342  ArgVT = VA.getLocVT();
3343  break;
3344  }
3345  case CCValAssign::AExt: {
3346  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3347  "Unexpected extend");
3348  bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
3349  ArgVT, ArgReg);
3350  if (!Emitted)
3351  Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3352  ArgVT, ArgReg);
3353  if (!Emitted)
3354  Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3355  ArgVT, ArgReg);
3356 
3357  assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
3358  ArgVT = VA.getLocVT();
3359  break;
3360  }
3361  case CCValAssign::BCvt: {
3362  ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg);
3363  assert(ArgReg && "Failed to emit a bitcast!");
3364  ArgVT = VA.getLocVT();
3365  break;
3366  }
3367  case CCValAssign::VExt:
3368  // VExt has not been implemented, so this should be impossible to reach
3369  // for now. However, fallback to Selection DAG isel once implemented.
3370  return false;
3374  case CCValAssign::FPExt:
3375  case CCValAssign::Trunc:
3376  llvm_unreachable("Unexpected loc info!");
3377  case CCValAssign::Indirect:
3378  // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3379  // support this.
3380  return false;
3381  }
3382 
3383  if (VA.isRegLoc()) {
3384  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3385  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3386  OutRegs.push_back(VA.getLocReg());
3387  } else {
3388  assert(VA.isMemLoc() && "Unknown value location!");
3389 
3390  // Don't emit stores for undef values.
3391  if (isa<UndefValue>(ArgVal))
3392  continue;
3393 
3394  unsigned LocMemOffset = VA.getLocMemOffset();
3395  X86AddressMode AM;
3396  AM.Base.Reg = RegInfo->getStackRegister();
3397  AM.Disp = LocMemOffset;
3398  ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
3399  Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3400  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3401  MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
3402  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3403  if (Flags.isByVal()) {
3404  X86AddressMode SrcAM;
3405  SrcAM.Base.Reg = ArgReg;
3406  if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
3407  return false;
3408  } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3409  // If this is a really simple value, emit this with the Value* version
3410  // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3411  // as it can cause us to reevaluate the argument.
3412  if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3413  return false;
3414  } else {
3415  if (!X86FastEmitStore(ArgVT, ArgReg, AM, MMO))
3416  return false;
3417  }
3418  }
3419  }
3420 
3421  // ELF / PIC requires GOT in the EBX register before function calls via PLT
3422  // GOT pointer.
3423  if (Subtarget->isPICStyleGOT()) {
3424  unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3425  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3426  TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
3427  }
3428 
3429  if (Is64Bit && IsVarArg && !IsWin64) {
3430  // From AMD64 ABI document:
3431  // For calls that may call functions that use varargs or stdargs
3432  // (prototype-less calls or calls to functions containing ellipsis (...) in
3433  // the declaration) %al is used as hidden argument to specify the number
3434  // of SSE registers used. The contents of %al do not need to match exactly
3435  // the number of registers, but must be an ubound on the number of SSE
3436  // registers used and is in the range 0 - 8 inclusive.
3437 
3438  // Count the number of XMM registers allocated.
3439  static const MCPhysReg XMMArgRegs[] = {
3440  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3441  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3442  };
3443  unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3444  assert((Subtarget->hasSSE1() || !NumXMMRegs)
3445  && "SSE registers cannot be used when SSE is disabled");
3446  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
3447  X86::AL).addImm(NumXMMRegs);
3448  }
3449 
3450  // Materialize callee address in a register. FIXME: GV address can be
3451  // handled with a CALLpcrel32 instead.
3452  X86AddressMode CalleeAM;
3453  if (!X86SelectCallAddress(Callee, CalleeAM))
3454  return false;
3455 
3456  unsigned CalleeOp = 0;
3457  const GlobalValue *GV = nullptr;
3458  if (CalleeAM.GV != nullptr) {
3459  GV = CalleeAM.GV;
3460  } else if (CalleeAM.Base.Reg != 0) {
3461  CalleeOp = CalleeAM.Base.Reg;
3462  } else
3463  return false;
3464 
3465  // Issue the call.
3466  MachineInstrBuilder MIB;
3467  if (CalleeOp) {
3468  // Register-indirect call.
3469  unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3470  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
3471  .addReg(CalleeOp);
3472  } else {
3473  // Direct call.
3474  assert(GV && "Not a direct call");
3475  // See if we need any target-specific flags on the GV operand.
3476  unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3477 
3478  // This will be a direct call, or an indirect call through memory for
3479  // NonLazyBind calls or dllimport calls.
3480  bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT ||
3481  OpFlags == X86II::MO_GOTPCREL ||
3482  OpFlags == X86II::MO_GOTPCREL_NORELAX ||
3483  OpFlags == X86II::MO_COFFSTUB;
3484  unsigned CallOpc = NeedLoad
3485  ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3486  : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3487 
3488  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
3489  if (NeedLoad)
3490  MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
3491  if (Symbol)
3492  MIB.addSym(Symbol, OpFlags);
3493  else
3494  MIB.addGlobalAddress(GV, 0, OpFlags);
3495  if (NeedLoad)
3496  MIB.addReg(0);
3497  }
3498 
3499  // Add a register mask operand representing the call-preserved registers.
3500  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3501  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3502 
3503  // Add an implicit use GOT pointer in EBX.
3504  if (Subtarget->isPICStyleGOT())
3506 
3507  if (Is64Bit && IsVarArg && !IsWin64)
3509 
3510  // Add implicit physical register uses to the call.
3511  for (auto Reg : OutRegs)
3513 
3514  // Issue CALLSEQ_END
3515  unsigned NumBytesForCalleeToPop =
3516  X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
3517  TM.Options.GuaranteedTailCallOpt)
3518  ? NumBytes // Callee pops everything.
3519  : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CB);
3520  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3521  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3522  .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
3523 
3524  // Now handle call return values.
3526  CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3527  CLI.RetTy->getContext());
3528  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3529 
3530  // Copy all of the result registers out of their specified physreg.
3531  Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3532  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3533  CCValAssign &VA = RVLocs[i];
3534  EVT CopyVT = VA.getValVT();
3535  unsigned CopyReg = ResultReg + i;
3536  Register SrcReg = VA.getLocReg();
3537 
3538  // If this is x86-64, and we disabled SSE, we can't return FP values
3539  if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3540  ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3541  report_fatal_error("SSE register return with SSE disabled");
3542  }
3543 
3544  // If we prefer to use the value in xmm registers, copy it out as f80 and
3545  // use a truncate to move it from fp stack reg to xmm reg.
3546  if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3547  isScalarFPTypeInSSEReg(VA.getValVT())) {
3548  CopyVT = MVT::f80;
3549  CopyReg = createResultReg(&X86::RFP80RegClass);
3550  }
3551 
3552  // Copy out the result.
3553  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3554  TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
3555  InRegs.push_back(VA.getLocReg());
3556 
3557  // Round the f80 to the right size, which also moves it to the appropriate
3558  // xmm register. This is accomplished by storing the f80 value in memory
3559  // and then loading it back.
3560  if (CopyVT != VA.getValVT()) {
3561  EVT ResVT = VA.getValVT();
3562  unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3563  unsigned MemSize = ResVT.getSizeInBits()/8;
3564  int FI = MFI.CreateStackObject(MemSize, Align(MemSize), false);
3565  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3566  TII.get(Opc)), FI)
3567  .addReg(CopyReg);
3568  Opc = ResVT == MVT::f32 ? X86::MOVSSrm_alt : X86::MOVSDrm_alt;
3569  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3570  TII.get(Opc), ResultReg + i), FI);
3571  }
3572  }
3573 
3574  CLI.ResultReg = ResultReg;
3575  CLI.NumResultRegs = RVLocs.size();
3576  CLI.Call = MIB;
3577 
3578  return true;
3579 }
3580 
3581 bool
3582 X86FastISel::fastSelectInstruction(const Instruction *I) {
3583  switch (I->getOpcode()) {
3584  default: break;
3585  case Instruction::Load:
3586  return X86SelectLoad(I);
3587  case Instruction::Store:
3588  return X86SelectStore(I);
3589  case Instruction::Ret:
3590  return X86SelectRet(I);
3591  case Instruction::ICmp:
3592  case Instruction::FCmp:
3593  return X86SelectCmp(I);
3594  case Instruction::ZExt:
3595  return X86SelectZExt(I);
3596  case Instruction::SExt:
3597  return X86SelectSExt(I);
3598  case Instruction::Br:
3599  return X86SelectBranch(I);
3600  case Instruction::LShr:
3601  case Instruction::AShr:
3602  case Instruction::Shl:
3603  return X86SelectShift(I);
3604  case Instruction::SDiv:
3605  case Instruction::UDiv:
3606  case Instruction::SRem:
3607  case Instruction::URem:
3608  return X86SelectDivRem(I);
3609  case Instruction::Select:
3610  return X86SelectSelect(I);
3611  case Instruction::Trunc:
3612  return X86SelectTrunc(I);
3613  case Instruction::FPExt:
3614  return X86SelectFPExt(I);
3615  case Instruction::FPTrunc:
3616  return X86SelectFPTrunc(I);
3617  case Instruction::SIToFP:
3618  return X86SelectSIToFP(I);
3619  case Instruction::UIToFP:
3620  return X86SelectUIToFP(I);
3621  case Instruction::IntToPtr: // Deliberate fall-through.
3622  case Instruction::PtrToInt: {
3623  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3624  EVT DstVT = TLI.getValueType(DL, I->getType());
3625  if (DstVT.bitsGT(SrcVT))
3626  return X86SelectZExt(I);
3627  if (DstVT.bitsLT(SrcVT))
3628  return X86SelectTrunc(I);
3629  Register Reg = getRegForValue(I->getOperand(0));
3630  if (Reg == 0) return false;
3631  updateValueMap(I, Reg);
3632  return true;
3633  }
3634  case Instruction::BitCast: {
3635  // Select SSE2/AVX bitcasts between 128/256/512 bit vector types.
3636  if (!Subtarget->hasSSE2())
3637  return false;
3638 
3639  MVT SrcVT, DstVT;
3640  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT) ||
3641  !isTypeLegal(I->getType(), DstVT))
3642  return false;
3643 
3644  // Only allow vectors that use xmm/ymm/zmm.
3645  if (!SrcVT.isVector() || !DstVT.isVector() ||
3646  SrcVT.getVectorElementType() == MVT::i1 ||
3647  DstVT.getVectorElementType() == MVT::i1)
3648  return false;
3649 
3650  Register Reg = getRegForValue(I->getOperand(0));
3651  if (!Reg)
3652  return false;
3653 
3654  // Emit a reg-reg copy so we don't propagate cached known bits information
3655  // with the wrong VT if we fall out of fast isel after selecting this.
3656  const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT);
3657  Register ResultReg = createResultReg(DstClass);
3658  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3659  TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg);
3660 
3661  updateValueMap(I, ResultReg);
3662  return true;
3663  }
3664  }
3665 
3666  return false;
3667 }
3668 
3669 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
3670  if (VT > MVT::i64)
3671  return 0;
3672 
3673  uint64_t Imm = CI->getZExtValue();
3674  if (Imm == 0) {
3675  Register SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3676  switch (VT.SimpleTy) {
3677  default: llvm_unreachable("Unexpected value type");
3678  case MVT::i1:
3679  case MVT::i8:
3680  return fastEmitInst_extractsubreg(MVT::i8, SrcReg, X86::sub_8bit);
3681  case MVT::i16:
3682  return fastEmitInst_extractsubreg(MVT::i16, SrcReg, X86::sub_16bit);
3683  case MVT::i32:
3684  return SrcReg;
3685  case MVT::i64: {
3686  Register ResultReg = createResultReg(&X86::GR64RegClass);
3687  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3688  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3689  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3690  return ResultReg;
3691  }
3692  }
3693  }
3694 
3695  unsigned Opc = 0;
3696  switch (VT.SimpleTy) {
3697  default: llvm_unreachable("Unexpected value type");
3698  case MVT::i1:
3699  VT = MVT::i8;
3701  case MVT::i8: Opc = X86::MOV8ri; break;
3702  case MVT::i16: Opc = X86::MOV16ri; break;
3703  case MVT::i32: Opc = X86::MOV32ri; break;
3704  case MVT::i64: {
3705  if (isUInt<32>(Imm))
3706  Opc = X86::MOV32ri64;
3707  else if (isInt<32>(Imm))
3708  Opc = X86::MOV64ri32;
3709  else
3710  Opc = X86::MOV64ri;
3711  break;
3712  }
3713  }
3714  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3715 }
3716 
3717 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
3718  if (CFP->isNullValue())
3719  return fastMaterializeFloatZero(CFP);
3720 
3721  // Can't handle alternate code models yet.
3722  CodeModel::Model CM = TM.getCodeModel();
3723  if (CM != CodeModel::Small && CM != CodeModel::Large)
3724  return 0;
3725 
3726  // Get opcode and regclass of the output for the given load instruction.
3727  unsigned Opc = 0;
3728  bool HasSSE1 = Subtarget->hasSSE1();
3729  bool HasSSE2 = Subtarget->hasSSE2();
3730  bool HasAVX = Subtarget->hasAVX();
3731  bool HasAVX512 = Subtarget->hasAVX512();
3732  switch (VT.SimpleTy) {
3733  default: return 0;
3734  case MVT::f32:
3735  Opc = HasAVX512 ? X86::VMOVSSZrm_alt
3736  : HasAVX ? X86::VMOVSSrm_alt
3737  : HasSSE1 ? X86::MOVSSrm_alt
3738  : X86::LD_Fp32m;
3739  break;
3740  case MVT::f64:
3741  Opc = HasAVX512 ? X86::VMOVSDZrm_alt
3742  : HasAVX ? X86::VMOVSDrm_alt
3743  : HasSSE2 ? X86::MOVSDrm_alt
3744  : X86::LD_Fp64m;
3745  break;
3746  case MVT::f80:
3747  // No f80 support yet.
3748  return 0;
3749  }
3750 
3751  // MachineConstantPool wants an explicit alignment.
3752  Align Alignment = DL.getPrefTypeAlign(CFP->getType());
3753 
3754  // x86-32 PIC requires a PIC base register for constant pools.
3755  unsigned PICBase = 0;
3756  unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
3757  if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
3758  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3759  else if (OpFlag == X86II::MO_GOTOFF)
3760  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3761  else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
3762  PICBase = X86::RIP;
3763 
3764  // Create the load from the constant pool.
3765  unsigned CPI = MCP.getConstantPoolIndex(CFP, Alignment);
3766  Register ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
3767 
3768  // Large code model only applies to 64-bit mode.
3769  if (Subtarget->is64Bit() && CM == CodeModel::Large) {
3770  Register AddrReg = createResultReg(&X86::GR64RegClass);
3771  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3772  AddrReg)
3773  .addConstantPoolIndex(CPI, 0, OpFlag);
3774  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3775  TII.get(Opc), ResultReg);
3776  addRegReg(MIB, AddrReg, false, PICBase, false);
3777  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3779  MachineMemOperand::MOLoad, DL.getPointerSize(), Alignment);
3780  MIB->addMemOperand(*FuncInfo.MF, MMO);
3781  return ResultReg;
3782  }
3783 
3784  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3785  TII.get(Opc), ResultReg),
3786  CPI, PICBase, OpFlag);
3787  return ResultReg;
3788 }
3789 
3790 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
3791  // Can't handle alternate code models yet.
3792  if (TM.getCodeModel() != CodeModel::Small)
3793  return 0;
3794 
3795  // Materialize addresses with LEA/MOV instructions.
3796  X86AddressMode AM;
3797  if (X86SelectAddress(GV, AM)) {
3798  // If the expression is just a basereg, then we're done, otherwise we need
3799  // to emit an LEA.
3800  if (AM.BaseType == X86AddressMode::RegBase &&
3801  AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3802  return AM.Base.Reg;
3803 
3804  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3805  if (TM.getRelocationModel() == Reloc::Static &&
3806  TLI.getPointerTy(DL) == MVT::i64) {
3807  // The displacement code could be more than 32 bits away so we need to use
3808  // an instruction with a 64 bit immediate
3809  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3810  ResultReg)
3811  .addGlobalAddress(GV);
3812  } else {
3813  unsigned Opc =
3814  TLI.getPointerTy(DL) == MVT::i32
3815  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3816  : X86::LEA64r;
3817  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3818  TII.get(Opc), ResultReg), AM);
3819  }
3820  return ResultReg;
3821  }
3822  return 0;
3823 }
3824 
3825 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
3826  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
3827 
3828  // Only handle simple types.
3829  if (!CEVT.isSimple())
3830  return 0;
3831  MVT VT = CEVT.getSimpleVT();
3832 
3833  if (const auto *CI = dyn_cast<ConstantInt>(C))
3834  return X86MaterializeInt(CI, VT);
3835  if (const auto *CFP = dyn_cast<ConstantFP>(C))
3836  return X86MaterializeFP(CFP, VT);
3837  if (const auto *GV = dyn_cast<GlobalValue>(C))
3838  return X86MaterializeGV(GV, VT);
3839  if (isa<UndefValue>(C)) {
3840  unsigned Opc = 0;
3841  switch (VT.SimpleTy) {
3842  default:
3843  break;
3844  case MVT::f32:
3845  if (!Subtarget->hasSSE1())
3846  Opc = X86::LD_Fp032;
3847  break;
3848  case MVT::f64:
3849  if (!Subtarget->hasSSE2())
3850  Opc = X86::LD_Fp064;
3851  break;
3852  case MVT::f80:
3853  Opc = X86::LD_Fp080;
3854  break;
3855  }
3856 
3857  if (Opc) {
3858  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3859  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
3860  ResultReg);
3861  return ResultReg;
3862  }
3863  }
3864 
3865  return 0;
3866 }
3867 
3868 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
3869  // Fail on dynamic allocas. At this point, getRegForValue has already
3870  // checked its CSE maps, so if we're here trying to handle a dynamic
3871  // alloca, we're not going to succeed. X86SelectAddress has a
3872  // check for dynamic allocas, because it's called directly from
3873  // various places, but targetMaterializeAlloca also needs a check
3874  // in order to avoid recursion between getRegForValue,
3875  // X86SelectAddrss, and targetMaterializeAlloca.
3876  if (!FuncInfo.StaticAllocaMap.count(C))
3877  return 0;
3878  assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3879 
3880  X86AddressMode AM;
3881  if (!X86SelectAddress(C, AM))
3882  return 0;
3883  unsigned Opc =
3884  TLI.getPointerTy(DL) == MVT::i32
3885  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3886  : X86::LEA64r;
3887  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
3888  Register ResultReg = createResultReg(RC);
3889  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3890  TII.get(Opc), ResultReg), AM);
3891  return ResultReg;
3892 }
3893 
3894 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
3895  MVT VT;
3896  if (!isTypeLegal(CF->getType(), VT))
3897  return 0;
3898 
3899  // Get opcode and regclass for the given zero.
3900  bool HasSSE1 = Subtarget->hasSSE1();
3901  bool HasSSE2 = Subtarget->hasSSE2();
3902  bool HasAVX512 = Subtarget->hasAVX512();
3903  unsigned Opc = 0;
3904  switch (VT.SimpleTy) {
3905  default: return 0;
3906  case MVT::f32:
3907  Opc = HasAVX512 ? X86::AVX512_FsFLD0SS
3908  : HasSSE1 ? X86::FsFLD0SS
3909  : X86::LD_Fp032;
3910  break;
3911  case MVT::f64:
3912  Opc = HasAVX512 ? X86::AVX512_FsFLD0SD
3913  : HasSSE2 ? X86::FsFLD0SD
3914  : X86::LD_Fp064;
3915  break;
3916  case MVT::f80:
3917  // No f80 support yet.
3918  return 0;
3919  }
3920 
3921  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3922  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
3923  return ResultReg;
3924 }
3925 
3926 
3927 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3928  const LoadInst *LI) {
3929  const Value *Ptr = LI->getPointerOperand();
3930  X86AddressMode AM;
3931  if (!X86SelectAddress(Ptr, AM))
3932  return false;
3933 
3934  const X86InstrInfo &XII = (const X86InstrInfo &)TII;
3935 
3936  unsigned Size = DL.getTypeAllocSize(LI->getType());
3937 
3939  AM.getFullAddress(AddrOps);
3940 
3942  *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, LI->getAlign(),
3943  /*AllowCommute=*/true);
3944  if (!Result)
3945  return false;
3946 
3947  // The index register could be in the wrong register class. Unfortunately,
3948  // foldMemoryOperandImpl could have commuted the instruction so its not enough
3949  // to just look at OpNo + the offset to the index reg. We actually need to
3950  // scan the instruction to find the index reg and see if its the correct reg
3951  // class.
3952  unsigned OperandNo = 0;
3953  for (MachineInstr::mop_iterator I = Result->operands_begin(),
3954  E = Result->operands_end(); I != E; ++I, ++OperandNo) {
3955  MachineOperand &MO = *I;
3956  if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
3957  continue;
3958  // Found the index reg, now try to rewrite it.
3959  Register IndexReg = constrainOperandRegClass(Result->getDesc(),
3960  MO.getReg(), OperandNo);
3961  if (IndexReg == MO.getReg())
3962  continue;
3963  MO.setReg(IndexReg);
3964  }
3965 
3966  Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3967  Result->cloneInstrSymbols(*FuncInfo.MF, *MI);
3969  removeDeadCode(I, std::next(I));
3970  return true;
3971 }
3972 
3973 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
3974  const TargetRegisterClass *RC,
3975  unsigned Op0, unsigned Op1,
3976  unsigned Op2, unsigned Op3) {
3977  const MCInstrDesc &II = TII.get(MachineInstOpcode);
3978 
3979  Register ResultReg = createResultReg(RC);
3980  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
3981  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
3982  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
3983  Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
3984 
3985  if (II.getNumDefs() >= 1)
3986  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
3987  .addReg(Op0)
3988  .addReg(Op1)
3989  .addReg(Op2)
3990  .addReg(Op3);
3991  else {
3992  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
3993  .addReg(Op0)
3994  .addReg(Op1)
3995  .addReg(Op2)
3996  .addReg(Op3);
3997  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3998  TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
3999  }
4000  return ResultReg;
4001 }
4002 
4003 
4004 namespace llvm {
4006  const TargetLibraryInfo *libInfo) {
4007  return new X86FastISel(funcInfo, libInfo);
4008  }
4009 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::Check::Size
@ Size
Definition: FileCheck.h:76
llvm::addRegReg
static const MachineInstrBuilder & addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2)
addRegReg - This function is used to add a memory reference of the form: [Reg + Reg].
Definition: X86InstrBuilder.h:164
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:151
i
i
Definition: README.txt:29
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:245
llvm::CmpInst::FCMP_ULE
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:734
llvm::CCValAssign::ZExt
@ ZExt
Definition: CallingConvLower.h:36
llvm::DbgVariableIntrinsic::getExpression
DIExpression * getExpression() const
Definition: IntrinsicInst.h:258
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::CallingConv::X86_FastCall
@ X86_FastCall
X86_FastCall - 'fast' analog of X86_StdCall.
Definition: CallingConv.h:107
llvm::CallingConv::X86_64_SysV
@ X86_64_SysV
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:159
llvm::MVT::getVectorElementType
MVT getVectorElementType() const
Definition: MachineValueType.h:519
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4637
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1076
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::CallingConv::CFGuard_Check
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:87
llvm::DbgDeclareInst::getAddress
Value * getAddress() const
Definition: IntrinsicInst.h:311
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
llvm::AArch64CC::AL
@ AL
Definition: AArch64BaseInfo.h:269
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:3017
llvm::CCValAssign::Full
@ Full
Definition: CallingConvLower.h:34
llvm::MVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: MachineValueType.h:350
llvm::X86Subtarget::hasSSE2
bool hasSSE2() const
Definition: X86Subtarget.h:200
llvm::CCValAssign::SExtUpper
@ SExtUpper
Definition: CallingConvLower.h:38
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:886
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::N86::EAX
@ EAX
Definition: X86MCTargetDesc.h:51
llvm::X86AddressMode
X86AddressMode - This struct holds a generalized full x86 address mode.
Definition: X86InstrBuilder.h:42
IntrinsicInst.h
X86Subtarget.h
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:189
llvm::ConstantExpr::getZExt
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2148
llvm::Function
Definition: Function.h:60
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
X86InstrBuilder.h
llvm::MachinePointerInfo::getConstantPool
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
Definition: MachineOperand.cpp:1000
X86SelectAddress
static void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, X86AddressMode &AM)
Definition: X86InstructionSelector.cpp:474
GetElementPtrTypeIterator.h
llvm::ConstantExpr::getSExt
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2134
llvm::MemIntrinsicBase::getDestAddressSpace
unsigned getDestAddressSpace() const
Definition: IntrinsicInst.h:742
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::X86AddressMode::GV
const GlobalValue * GV
Definition: X86InstrBuilder.h:56
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:366
llvm::CmpInst::FCMP_ONE
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:727
llvm::X86Subtarget
Definition: X86Subtarget.h:52
ErrorHandling.h
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::X86::COND_P
@ COND_P
Definition: X86BaseInfo.h:91
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:833
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:766
llvm::CCValAssign::VExt
@ VExt
Definition: CallingConvLower.h:46
llvm::CCValAssign::Indirect
@ Indirect
Definition: CallingConvLower.h:50
llvm::X86Subtarget::isTargetMCU
bool isTargetMCU() const
Definition: X86Subtarget.h:287
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:234
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::CallingConv::Win64
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:169
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:488
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::CallingConv::Tail
@ Tail
Tail - This calling convention attemps to make calls as fast as possible while guaranteeing that tail...
Definition: CallingConv.h:81
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:126
llvm::CallingConv::GHC
@ GHC
Definition: CallingConv.h:51
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::DbgVariableIntrinsic::getVariable
DILocalVariable * getVariable() const
Definition: IntrinsicInst.h:254
llvm::CCValAssign::BCvt
@ BCvt
Definition: CallingConvLower.h:44
llvm::CCValAssign::AExtUpper
@ AExtUpper
Definition: CallingConvLower.h:42
llvm::CallingConv::X86_ThisCall
@ X86_ThisCall
X86_ThisCall - Similar to X86_StdCall.
Definition: CallingConv.h:126
Operator.h
llvm::successors
auto successors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::CCValAssign::ZExtUpper
@ ZExtUpper
Definition: CallingConvLower.h:40
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::X86::CondCode
CondCode
Definition: X86BaseInfo.h:80
llvm::X86ISD::SMUL
@ SMUL
Definition: X86ISelLowering.h:399
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:268
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:123
llvm::MVT::v2f64
@ v2f64
Definition: MachineValueType.h:172
llvm::CCValAssign::Trunc
@ Trunc
Definition: CallingConvLower.h:45
llvm::X86II::MO_GOTOFF
@ MO_GOTOFF
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:434
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:224
llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:723
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1618
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
OpIndex
unsigned OpIndex
Definition: SPIRVModuleAnalysis.cpp:41
llvm::MachineInstr::addMemOperand
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
Definition: MachineInstr.cpp:355
llvm::X86::COND_O
@ COND_O
Definition: X86BaseInfo.h:81
F
#define F(x, y, z)
Definition: MD5.cpp:55
MachineRegisterInfo.h
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
llvm::X86AddressMode::GVOpFlags
unsigned GVOpFlags
Definition: X86InstrBuilder.h:57
llvm::CmpInst::FCMP_ULT
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:733
llvm::CallingConv::HiPE
@ HiPE
Definition: CallingConv.h:55
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Type::isArrayTy
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:215
X86MachineFunctionInfo.h
llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition: TargetRegisterInfo.h:94
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::CCValAssign::AExt
@ AExt
Definition: CallingConvLower.h:37
llvm::MemTransferBase::getSourceAddressSpace
unsigned getSourceAddressSpace() const
Definition: IntrinsicInst.h:807
llvm::X86II::MO_COFFSTUB
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: X86BaseInfo.h:575
X86.h
llvm::MVT::v8f64
@ v8f64
Definition: MachineValueType.h:175
llvm::addConstantPoolReference
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
addConstantPoolReference - This function is used to add a reference to the base of a constant value s...
Definition: X86InstrBuilder.h:223
llvm::addFullAddress
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
Definition: X86InstrBuilder.h:172
llvm::X86AddressMode::Base
union llvm::X86AddressMode::@582 Base
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:31
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:763
llvm::ISD::ArgFlagsTy::isByVal
bool isByVal() const
Definition: TargetCallingConv.h:85
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
FunctionLoweringInfo.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:148
llvm::User
Definition: User.h:44
llvm::addDirectMem
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – th...
Definition: X86InstrBuilder.h:124
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:56
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::X86II::MO_GOTPCREL
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:442
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::X86AddressMode::Scale
unsigned Scale
Definition: X86InstrBuilder.h:53
llvm::MVT::x86mmx
@ x86mmx
Definition: MachineValueType.h:260
MCSymbol.h
llvm::MemTransferBase::getRawSource
Value * getRawSource() const
Return the arguments to the instruction.
Definition: IntrinsicInst.h:794
llvm::CmpInst::FCMP_UGE
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:732
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:321
llvm::RetCC_X86
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
llvm::addFrameReference
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
Definition: PPCInstrBuilder.h:32
llvm::Mips::GPRIdx
@ GPRIdx
Definition: MipsRegisterBankInfo.cpp:44
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:127
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::X86Subtarget::hasSSE1
bool hasSSE1() const
Definition: X86Subtarget.h:199
llvm::X86AddressMode::BaseType
enum llvm::X86AddressMode::@581 BaseType
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:28
llvm::ISD::NodeType
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
llvm::X86::AddrIndexReg
@ AddrIndexReg
Definition: X86BaseInfo.h:34
llvm::MVT::v4f64
@ v4f64
Definition: MachineValueType.h:174
llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:729
llvm::X86AddressMode::FrameIndexBase
@ FrameIndexBase
Definition: X86InstrBuilder.h:45
llvm::Instruction
Definition: Instruction.h:42
llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:147
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::X86II::MO_GOTPCREL_NORELAX
@ MO_GOTPCREL_NORELAX
MO_GOTPCREL_NORELAX - Same as MO_GOTPCREL except that R_X86_64_GOTPCREL relocations are guaranteed to...
Definition: X86BaseInfo.h:447
llvm::DILocalVariable::isValidLocationForIntrinsic
bool isValidLocationForIntrinsic(const DILocation *DL) const
Check that a location is valid for this variable.
Definition: DebugInfoMetadata.h:3127
llvm::CmpInst::FCMP_OEQ
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:722
llvm::CmpInst::FCMP_OLT
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:725
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:666
llvm::CCValAssign::getLocInfo
LocInfo getLocInfo() const
Definition: CallingConvLower.h:153
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::X86AddressMode::Reg
unsigned Reg
Definition: X86InstrBuilder.h:49
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:149
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:272
llvm::CCValAssign::isRegLoc
bool isRegLoc() const
Definition: CallingConvLower.h:143
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::Triple::isOSMSVCRT
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:600
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::MemSetInst
This class wraps the llvm.memset intrinsic.
Definition: IntrinsicInst.h:993
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3180
llvm::CmpInst::FCMP_FALSE
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:721
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:642
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::CodeModel::Model
Model
Definition: CodeGen.h:28
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:709
llvm::MVT::f80
@ f80
Definition: MachineValueType.h:57
X86ChooseCmpOpcode
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
Definition: X86FastISel.cpp:1354
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:191
llvm::RegState::Implicit
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Definition: MachineInstrBuilder.h:46
llvm::isInt< 8 >
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:367
llvm::MVT::v4i64
@ v4i64
Definition: MachineValueType.h:120
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:640
llvm::MVT::v32i16
@ v32i16
Definition: MachineValueType.h:94
llvm::X86AddressMode::IndexReg
unsigned IndexReg
Definition: X86InstrBuilder.h:54
llvm::SPIRV::Decoration::Alignment
@ Alignment
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:305
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::CallingConv::Fast
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:80
BranchProbabilityInfo.h
llvm::isInt< 32 >
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:373
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:340
llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:180
llvm::CCValAssign::SExt
@ SExt
Definition: CallingConvLower.h:35
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:408
llvm::MVT::v16i16
@ v16i16
Definition: MachineValueType.h:93
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::MVT::v2i64
@ v2i64
Definition: MachineValueType.h:118
llvm::isGlobalStubReference
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
Definition: X86InstrInfo.h:82
llvm::StructLayout
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:622
uint64_t
llvm::N86::EDI
@ EDI
Definition: X86MCTargetDesc.h:51
llvm::CallingConv::X86_StdCall
@ X86_StdCall
X86_StdCall - stdcall is the calling conventions mostly used by the Win32 API.
Definition: CallingConv.h:102
llvm::N86::EDX
@ EDX
Definition: X86MCTargetDesc.h:51
llvm::DbgDeclareInst
This represents the llvm.dbg.declare instruction.
Definition: IntrinsicInst.h:309
llvm::X86II::MO_PIC_BASE_OFFSET
@ MO_PIC_BASE_OFFSET
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:420
llvm::MVT::v16f32
@ v16f32
Definition: MachineValueType.h:162
llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4780
AH
AMD64 Optimization Manual has some nice information about optimizing integer multiplication by a constant How much of it applies to Intel s X86 implementation There are definite trade offs to xmm0 cvttss2siq rdx jb L3 subss xmm0 rax cvttss2siq rdx xorq rdx rax ret instead of xmm1 cvttss2siq rcx movaps xmm2 subss xmm2 cvttss2siq rax rdx xorq rax ucomiss xmm0 cmovb rax ret Seems like the jb branch has high likelihood of being taken It would have saved a few instructions It s not possible to reference AH
Definition: README-X86-64.txt:44
llvm::ISD::TRAP
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1126
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:716
llvm::codeview::FrameCookieKind::Copy
@ Copy
DebugInfo.h
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::X86RegisterInfo::getPtrSizedFrameRegister
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const
Definition: X86RegisterInfo.cpp:916
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:929
MachineConstantPool.h
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::X86ISD::UMUL
@ UMUL
Definition: X86ISelLowering.h:400
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:157
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:411
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:44
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::FunctionLoweringInfo
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Definition: FunctionLoweringInfo.h:52
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:864
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:656
llvm::X86::COND_B
@ COND_B
Definition: X86BaseInfo.h:83
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::CmpInst::FCMP_OGE
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:724
llvm::X86AddressMode::Disp
int Disp
Definition: X86InstrBuilder.h:55
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1404
llvm::X86TargetMachine
Definition: X86TargetMachine.h:28
llvm::X86MachineFunctionInfo
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
Definition: X86MachineFunctionInfo.h:25
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::FastISel
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:65
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::X86InstrInfo::foldMemoryOperandImpl
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
Definition: X86InstrInfo.cpp:6202
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::X86InstrInfo
Definition: X86InstrInfo.h:138
TargetOptions.h
llvm::CCValAssign::getValNo
unsigned getValNo() const
Definition: CallingConvLower.h:140
llvm::MachineInstrBuilder::addRegMask
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Definition: MachineInstrBuilder.h:197
llvm::TargetMachine::getMCAsmInfo
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
Definition: TargetMachine.h:205
llvm::X86II::MO_DLLIMPORT
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
Definition: X86BaseInfo.h:537
llvm::X86::isCalleePop
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
Definition: X86ISelLowering.cpp:5242
llvm::N86::ESI
@ ESI
Definition: X86MCTargetDesc.h:51
MCAsmInfo.h
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1612
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:47
llvm::MachineFrameInfo::CreateStackObject
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition: MachineFrameInfo.cpp:51
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:178
llvm::isGlobalRelativeToPICBase
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
Definition: X86InstrInfo.h:100
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::X86MachineFunctionInfo::getBytesToPopOnReturn
unsigned getBytesToPopOnReturn() const
Definition: X86MachineFunctionInfo.h:155
llvm::Reloc::Static
@ Static
Definition: CodeGen.h:22
llvm::CallingConv::WebKit_JS
@ WebKit_JS
Definition: CallingConv.h:58
llvm::CallingConv::C
@ C
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
uint32_t
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
llvm::ISD::ArgFlagsTy
Definition: TargetCallingConv.h:27
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MVT::v64i8
@ v64i8
Definition: MachineValueType.h:82
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:374
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::X86::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
Definition: X86FastISel.cpp:4005
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:148
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:103
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:280
llvm::CCValAssign::FPExt
@ FPExt
Definition: CallingConvLower.h:49
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:176
llvm::CallBase::paramHasAttr
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Definition: Instructions.cpp:341
llvm::MVT::v8i64
@ v8i64
Definition: MachineValueType.h:121
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:133
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:108
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:186
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:164
CallingConv.h
llvm::Instruction::isAtomic
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
Definition: Instruction.cpp:616
llvm::CmpInst::FCMP_UGT
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:731
llvm::X86Subtarget::hasAVX512
bool hasAVX512() const
Definition: X86Subtarget.h:207
llvm::CCValAssign::isMemLoc
bool isMemLoc() const
Definition: CallingConvLower.h:144
llvm::ISD::ArgFlagsTy::getByValSize
unsigned getByValSize() const
Definition: TargetCallingConv.h:169
X86ChooseCmpImmediateOpcode
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
If we have a comparison with RHS as the RHS of the comparison, return an opcode that works for the co...
Definition: X86FastISel.cpp:1381
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:971
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:268
X86CallingConv.h
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:148
llvm::TargetRegisterInfo::getRegSizeInBits
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Definition: TargetRegisterInfo.h:277
llvm::MemIntrinsicBase::getLength
Value * getLength() const
Definition: IntrinsicInst.h:731
uint16_t
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:636
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:341
X86TargetMachine.h
llvm::StructLayout::getElementOffset
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:652
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::N86::EBX
@ EBX
Definition: X86MCTargetDesc.h:51
llvm::CC_X86
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1339
GlobalVariable.h
llvm::MachineInstrBuilder::addConstantPoolIndex
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:158
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:92
llvm::pdb::PDB_LocType::Slot
@ Slot
llvm::ISD::ArgFlagsTy::isSExt
bool isSExt() const
Definition: TargetCallingConv.h:76
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:46
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::CallBase::arg_empty
bool arg_empty() const
Definition: InstrTypes.h:1338
llvm::X86Subtarget::hasAVX
bool hasAVX() const
Definition: X86Subtarget.h:205
llvm::X86::getCMovOpcode
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given register size in bytes, and operand type.
Definition: X86InstrInfo.cpp:2802
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::MachineFrameInfo::setStackProtectorIndex
void setStackProtectorIndex(int I)
Definition: MachineFrameInfo.h:358
llvm::MVT::v8i32
@ v8i32
Definition: MachineValueType.h:107
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:135
GlobalAlias.h
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
X86RegisterInfo.h
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::ARMBuildAttrs::Symbol
@ Symbol
Definition: ARMBuildAttributes.h:83
llvm::X86AddressMode::FrameIndex
int FrameIndex
Definition: X86InstrBuilder.h:50
llvm::X86AddressMode::RegBase
@ RegBase
Definition: X86InstrBuilder.h:44
llvm::MachineInstrBuilder::addGlobalAddress
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:177
llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:28
llvm::getKillRegState
unsigned getKillRegState(bool B)
Definition: MachineInstrBuilder.h:508
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:105
FastISel.h
llvm::X86Subtarget::getTargetTriple
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:268
Instructions.h
llvm::MemCpyInst
This class wraps the llvm.memcpy intrinsic.
Definition: IntrinsicInst.h:1024
llvm::MachineInstrBuilder::addSym
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
Definition: MachineInstrBuilder.h:267
llvm::FunctionLoweringInfo::MF
MachineFunction * MF
Definition: FunctionLoweringInfo.h:55
llvm::Type::isStructTy
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:212
llvm::GetReturnInfo
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
Definition: TargetLoweringBase.cpp:1614
llvm::MemIntrinsic::isVolatile
bool isVolatile() const
Definition: IntrinsicInst.h:971
llvm::X86AddressMode::getFullAddress
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
Definition: X86InstrBuilder.h:65
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:367
llvm::CallingConv::SwiftTail
@ SwiftTail
SwiftTail - This follows the Swift calling convention in how arguments are passed but guarantees tail...
Definition: CallingConv.h:92
llvm::X86::getX86ConditionCode
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
Definition: X86InstrInfo.cpp:2764
llvm::User::op_begin
op_iterator op_begin()
Definition: User.h:234
getX86SSEConditionCode
static std::pair< unsigned, bool > getX86SSEConditionCode(CmpInst::Predicate Predicate)
Definition: X86FastISel.cpp:176
llvm::CCValAssign::getValVT
MVT getValVT() const
Definition: CallingConvLower.h:141
llvm::MVT::f16
@ f16
Definition: MachineValueType.h:54
llvm::CmpInst::FCMP_UNE
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:735
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
computeBytesPoppedByCalleeForSRet
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, const CallBase *CB)
Definition: X86FastISel.cpp:3136
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:53
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:494
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::N86::ECX
@ ECX
Definition: X86MCTargetDesc.h:51
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:809
llvm::constrainOperandRegClass
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:52
llvm::MemIntrinsicBase::getRawDest
Value * getRawDest() const
Definition: IntrinsicInst.h:725
llvm::orc::SymbolState::Emitted
@ Emitted
Assigned address, still materializing.
llvm::MachineInstrBuilder::addMetadata
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
Definition: MachineInstrBuilder.h:236
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:160
llvm::CmpInst::FCMP_OLE
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:726
llvm::ISD::ArgFlagsTy::isInReg
bool isInReg() const
Definition: TargetCallingConv.h:79
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
DerivedTypes.h
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::MVT::v32i8
@ v32i8
Definition: MachineValueType.h:81
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:256
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:45
llvm::X86RegisterInfo::getStackRegister
Register getStackRegister() const
Definition: X86RegisterInfo.h:147
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MachineFrameInfo::setFrameAddressIsTaken
void setFrameAddressIsTaken(bool T)
Definition: MachineFrameInfo.h:371
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:58
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::MachineMemOperand::isNonTemporal
bool isNonTemporal() const
Definition: MachineMemOperand.h:289
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3099
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:760
llvm::CallingConv::Swift
@ Swift
Definition: CallingConv.h:73
llvm::X86MachineFunctionInfo::getSRetReturnReg
Register getSRetReturnReg() const
Definition: X86MachineFunctionInfo.h:167
llvm::MVT::v8f32
@ v8f32
Definition: MachineValueType.h:161
X86InstrInfo.h
llvm::CmpInst::FCMP_TRUE
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:736
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::X86::COND_E
@ COND_E
Definition: X86BaseInfo.h:85
llvm::MCAsmInfo::usesWindowsCFI
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:793
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:55
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:230
llvm::X86::LAST_VALID_COND
@ LAST_VALID_COND
Definition: X86BaseInfo.h:97
llvm::MachinePointerInfo::getStack
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
Definition: MachineOperand.cpp:1019
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::X86::COND_NP
@ COND_NP
Definition: X86BaseInfo.h:92
llvm::TargetRegisterInfo::getCallPreservedMask
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function.
Definition: TargetRegisterInfo.h:480
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:606
llvm::CmpInst::FCMP_ORD
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:728
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3192
llvm::BasicBlock::const_iterator
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:88
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::CmpInst::FCMP_UEQ
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:730
llvm::SPIRV::MemoryOperand::Aligned
@ Aligned
llvm::X86::COND_NE
@ COND_NE
Definition: X86BaseInfo.h:86
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::MCInstrDesc::ImplicitDefs
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:207
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:288
llvm::X86RegisterInfo
Definition: X86RegisterInfo.h:24
llvm::User::op_end
op_iterator op_end()
Definition: User.h:236