LLVM  14.0.0git
X86FastISel.cpp
Go to the documentation of this file.
1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the X86-specific support for the FastISel class. Much
10 // of the target-specific code is generated by tablegen in the file
11 // X86GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86CallingConv.h"
17 #include "X86InstrBuilder.h"
18 #include "X86InstrInfo.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86RegisterInfo.h"
21 #include "X86Subtarget.h"
22 #include "X86TargetMachine.h"
24 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/IR/CallingConv.h"
30 #include "llvm/IR/DebugInfo.h"
31 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/GlobalAlias.h"
34 #include "llvm/IR/GlobalVariable.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/IntrinsicsX86.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/MC/MCAsmInfo.h"
40 #include "llvm/MC/MCSymbol.h"
43 using namespace llvm;
44 
45 namespace {
46 
47 class X86FastISel final : public FastISel {
48  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
49  /// make the right decision when generating code for different targets.
50  const X86Subtarget *Subtarget;
51 
52  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
53  /// floating point ops.
54  /// When SSE is available, use it for f32 operations.
55  /// When SSE2 is available, use it for f64 operations.
56  bool X86ScalarSSEf64;
57  bool X86ScalarSSEf32;
58  bool X86ScalarSSEf16;
59 
60 public:
61  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
62  const TargetLibraryInfo *libInfo)
63  : FastISel(funcInfo, libInfo) {
64  Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
65  X86ScalarSSEf64 = Subtarget->hasSSE2();
66  X86ScalarSSEf32 = Subtarget->hasSSE1();
67  X86ScalarSSEf16 = Subtarget->hasFP16();
68  }
69 
70  bool fastSelectInstruction(const Instruction *I) override;
71 
72  /// The specified machine instr operand is a vreg, and that
73  /// vreg is being provided by the specified load instruction. If possible,
74  /// try to fold the load as an operand to the instruction, returning true if
75  /// possible.
76  bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
77  const LoadInst *LI) override;
78 
79  bool fastLowerArguments() override;
80  bool fastLowerCall(CallLoweringInfo &CLI) override;
81  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
82 
83 #include "X86GenFastISel.inc"
84 
85 private:
86  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
87  const DebugLoc &DL);
88 
89  bool X86FastEmitLoad(MVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
90  unsigned &ResultReg, unsigned Alignment = 1);
91 
92  bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
93  MachineMemOperand *MMO = nullptr, bool Aligned = false);
94  bool X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM,
95  MachineMemOperand *MMO = nullptr, bool Aligned = false);
96 
97  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
98  unsigned &ResultReg);
99 
100  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
101  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
102 
103  bool X86SelectLoad(const Instruction *I);
104 
105  bool X86SelectStore(const Instruction *I);
106 
107  bool X86SelectRet(const Instruction *I);
108 
109  bool X86SelectCmp(const Instruction *I);
110 
111  bool X86SelectZExt(const Instruction *I);
112 
113  bool X86SelectSExt(const Instruction *I);
114 
115  bool X86SelectBranch(const Instruction *I);
116 
117  bool X86SelectShift(const Instruction *I);
118 
119  bool X86SelectDivRem(const Instruction *I);
120 
121  bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
122 
123  bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
124 
125  bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
126 
127  bool X86SelectSelect(const Instruction *I);
128 
129  bool X86SelectTrunc(const Instruction *I);
130 
131  bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
132  const TargetRegisterClass *RC);
133 
134  bool X86SelectFPExt(const Instruction *I);
135  bool X86SelectFPTrunc(const Instruction *I);
136  bool X86SelectSIToFP(const Instruction *I);
137  bool X86SelectUIToFP(const Instruction *I);
138  bool X86SelectIntToFP(const Instruction *I, bool IsSigned);
139 
140  const X86InstrInfo *getInstrInfo() const {
141  return Subtarget->getInstrInfo();
142  }
143  const X86TargetMachine *getTargetMachine() const {
144  return static_cast<const X86TargetMachine *>(&TM);
145  }
146 
147  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
148 
149  unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
150  unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
151  unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
152  unsigned fastMaterializeConstant(const Constant *C) override;
153 
154  unsigned fastMaterializeAlloca(const AllocaInst *C) override;
155 
156  unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
157 
158  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
159  /// computed in an SSE register, not on the X87 floating point stack.
160  bool isScalarFPTypeInSSEReg(EVT VT) const {
161  return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
162  (VT == MVT::f32 && X86ScalarSSEf32) || // f32 is when SSE1
163  (VT == MVT::f16 && X86ScalarSSEf16); // f16 is when AVX512FP16
164  }
165 
166  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
167 
168  bool IsMemcpySmall(uint64_t Len);
169 
170  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
171  X86AddressMode SrcAM, uint64_t Len);
172 
173  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
174  const Value *Cond);
175 
177  X86AddressMode &AM);
178 
179  unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
180  const TargetRegisterClass *RC, unsigned Op0,
181  unsigned Op1, unsigned Op2, unsigned Op3);
182 };
183 
184 } // end anonymous namespace.
185 
186 static std::pair<unsigned, bool>
188  unsigned CC;
189  bool NeedSwap = false;
190 
191  // SSE Condition code mapping:
192  // 0 - EQ
193  // 1 - LT
194  // 2 - LE
195  // 3 - UNORD
196  // 4 - NEQ
197  // 5 - NLT
198  // 6 - NLE
199  // 7 - ORD
200  switch (Predicate) {
201  default: llvm_unreachable("Unexpected predicate");
202  case CmpInst::FCMP_OEQ: CC = 0; break;
203  case CmpInst::FCMP_OGT: NeedSwap = true; LLVM_FALLTHROUGH;
204  case CmpInst::FCMP_OLT: CC = 1; break;
205  case CmpInst::FCMP_OGE: NeedSwap = true; LLVM_FALLTHROUGH;
206  case CmpInst::FCMP_OLE: CC = 2; break;
207  case CmpInst::FCMP_UNO: CC = 3; break;
208  case CmpInst::FCMP_UNE: CC = 4; break;
209  case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH;
210  case CmpInst::FCMP_UGE: CC = 5; break;
211  case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
212  case CmpInst::FCMP_UGT: CC = 6; break;
213  case CmpInst::FCMP_ORD: CC = 7; break;
214  case CmpInst::FCMP_UEQ: CC = 8; break;
215  case CmpInst::FCMP_ONE: CC = 12; break;
216  }
217 
218  return std::make_pair(CC, NeedSwap);
219 }
220 
221 /// Adds a complex addressing mode to the given machine instr builder.
222 /// Note, this will constrain the index register. If its not possible to
223 /// constrain the given index register, then a new one will be created. The
224 /// IndexReg field of the addressing mode will be updated to match in this case.
225 const MachineInstrBuilder &
227  X86AddressMode &AM) {
228  // First constrain the index register. It needs to be a GR64_NOSP.
230  MIB->getNumOperands() +
232  return ::addFullAddress(MIB, AM);
233 }
234 
235 /// Check if it is possible to fold the condition from the XALU intrinsic
236 /// into the user. The condition code will only be updated on success.
237 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
238  const Value *Cond) {
239  if (!isa<ExtractValueInst>(Cond))
240  return false;
241 
242  const auto *EV = cast<ExtractValueInst>(Cond);
243  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
244  return false;
245 
246  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
247  MVT RetVT;
248  const Function *Callee = II->getCalledFunction();
249  Type *RetTy =
250  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
251  if (!isTypeLegal(RetTy, RetVT))
252  return false;
253 
254  if (RetVT != MVT::i32 && RetVT != MVT::i64)
255  return false;
256 
257  X86::CondCode TmpCC;
258  switch (II->getIntrinsicID()) {
259  default: return false;
260  case Intrinsic::sadd_with_overflow:
261  case Intrinsic::ssub_with_overflow:
262  case Intrinsic::smul_with_overflow:
263  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
264  case Intrinsic::uadd_with_overflow:
265  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
266  }
267 
268  // Check if both instructions are in the same basic block.
269  if (II->getParent() != I->getParent())
270  return false;
271 
272  // Make sure nothing is in the way
275  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
276  // We only expect extractvalue instructions between the intrinsic and the
277  // instruction to be selected.
278  if (!isa<ExtractValueInst>(Itr))
279  return false;
280 
281  // Check that the extractvalue operand comes from the intrinsic.
282  const auto *EVI = cast<ExtractValueInst>(Itr);
283  if (EVI->getAggregateOperand() != II)
284  return false;
285  }
286 
287  // Make sure no potentially eflags clobbering phi moves can be inserted in
288  // between.
289  auto HasPhis = [](const BasicBlock *Succ) {
290  return !llvm::empty(Succ->phis());
291  };
292  if (I->isTerminator() && llvm::any_of(successors(I), HasPhis))
293  return false;
294 
295  CC = TmpCC;
296  return true;
297 }
298 
299 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
300  EVT evt = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true);
301  if (evt == MVT::Other || !evt.isSimple())
302  // Unhandled type. Halt "fast" selection and bail.
303  return false;
304 
305  VT = evt.getSimpleVT();
306  // For now, require SSE/SSE2 for performing floating-point operations,
307  // since x87 requires additional work.
308  if (VT == MVT::f64 && !X86ScalarSSEf64)
309  return false;
310  if (VT == MVT::f32 && !X86ScalarSSEf32)
311  return false;
312  // Similarly, no f80 support yet.
313  if (VT == MVT::f80)
314  return false;
315  // We only handle legal types. For example, on x86-32 the instruction
316  // selector contains all of the 64-bit instructions from x86-64,
317  // under the assumption that i64 won't be used if the target doesn't
318  // support it.
319  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
320 }
321 
322 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
323 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
324 /// Return true and the result register by reference if it is possible.
325 bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM,
326  MachineMemOperand *MMO, unsigned &ResultReg,
327  unsigned Alignment) {
328  bool HasSSE41 = Subtarget->hasSSE41();
329  bool HasAVX = Subtarget->hasAVX();
330  bool HasAVX2 = Subtarget->hasAVX2();
331  bool HasAVX512 = Subtarget->hasAVX512();
332  bool HasVLX = Subtarget->hasVLX();
333  bool IsNonTemporal = MMO && MMO->isNonTemporal();
334 
335  // Treat i1 loads the same as i8 loads. Masking will be done when storing.
336  if (VT == MVT::i1)
337  VT = MVT::i8;
338 
339  // Get opcode and regclass of the output for the given load instruction.
340  unsigned Opc = 0;
341  switch (VT.SimpleTy) {
342  default: return false;
343  case MVT::i8:
344  Opc = X86::MOV8rm;
345  break;
346  case MVT::i16:
347  Opc = X86::MOV16rm;
348  break;
349  case MVT::i32:
350  Opc = X86::MOV32rm;
351  break;
352  case MVT::i64:
353  // Must be in x86-64 mode.
354  Opc = X86::MOV64rm;
355  break;
356  case MVT::f32:
357  if (X86ScalarSSEf32)
358  Opc = HasAVX512 ? X86::VMOVSSZrm_alt :
359  HasAVX ? X86::VMOVSSrm_alt :
360  X86::MOVSSrm_alt;
361  else
362  Opc = X86::LD_Fp32m;
363  break;
364  case MVT::f64:
365  if (X86ScalarSSEf64)
366  Opc = HasAVX512 ? X86::VMOVSDZrm_alt :
367  HasAVX ? X86::VMOVSDrm_alt :
368  X86::MOVSDrm_alt;
369  else
370  Opc = X86::LD_Fp64m;
371  break;
372  case MVT::f80:
373  // No f80 support yet.
374  return false;
375  case MVT::v4f32:
376  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
377  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
378  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
379  else if (Alignment >= 16)
380  Opc = HasVLX ? X86::VMOVAPSZ128rm :
381  HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
382  else
383  Opc = HasVLX ? X86::VMOVUPSZ128rm :
384  HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
385  break;
386  case MVT::v2f64:
387  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
388  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
389  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
390  else if (Alignment >= 16)
391  Opc = HasVLX ? X86::VMOVAPDZ128rm :
392  HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
393  else
394  Opc = HasVLX ? X86::VMOVUPDZ128rm :
395  HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
396  break;
397  case MVT::v4i32:
398  case MVT::v2i64:
399  case MVT::v8i16:
400  case MVT::v16i8:
401  if (IsNonTemporal && Alignment >= 16 && HasSSE41)
402  Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
403  HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
404  else if (Alignment >= 16)
405  Opc = HasVLX ? X86::VMOVDQA64Z128rm :
406  HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
407  else
408  Opc = HasVLX ? X86::VMOVDQU64Z128rm :
409  HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
410  break;
411  case MVT::v8f32:
412  assert(HasAVX);
413  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
414  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
415  else if (IsNonTemporal && Alignment >= 16)
416  return false; // Force split for X86::VMOVNTDQArm
417  else if (Alignment >= 32)
418  Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
419  else
420  Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
421  break;
422  case MVT::v4f64:
423  assert(HasAVX);
424  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
425  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
426  else if (IsNonTemporal && Alignment >= 16)
427  return false; // Force split for X86::VMOVNTDQArm
428  else if (Alignment >= 32)
429  Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
430  else
431  Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
432  break;
433  case MVT::v8i32:
434  case MVT::v4i64:
435  case MVT::v16i16:
436  case MVT::v32i8:
437  assert(HasAVX);
438  if (IsNonTemporal && Alignment >= 32 && HasAVX2)
439  Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
440  else if (IsNonTemporal && Alignment >= 16)
441  return false; // Force split for X86::VMOVNTDQArm
442  else if (Alignment >= 32)
443  Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
444  else
445  Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
446  break;
447  case MVT::v16f32:
448  assert(HasAVX512);
449  if (IsNonTemporal && Alignment >= 64)
450  Opc = X86::VMOVNTDQAZrm;
451  else
452  Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
453  break;
454  case MVT::v8f64:
455  assert(HasAVX512);
456  if (IsNonTemporal && Alignment >= 64)
457  Opc = X86::VMOVNTDQAZrm;
458  else
459  Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
460  break;
461  case MVT::v8i64:
462  case MVT::v16i32:
463  case MVT::v32i16:
464  case MVT::v64i8:
465  assert(HasAVX512);
466  // Note: There are a lot more choices based on type with AVX-512, but
467  // there's really no advantage when the load isn't masked.
468  if (IsNonTemporal && Alignment >= 64)
469  Opc = X86::VMOVNTDQAZrm;
470  else
471  Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
472  break;
473  }
474 
475  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
476 
477  ResultReg = createResultReg(RC);
478  MachineInstrBuilder MIB =
479  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
480  addFullAddress(MIB, AM);
481  if (MMO)
482  MIB->addMemOperand(*FuncInfo.MF, MMO);
483  return true;
484 }
485 
486 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
487 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
488 /// and a displacement offset, or a GlobalAddress,
489 /// i.e. V. Return true if it is possible.
490 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM,
491  MachineMemOperand *MMO, bool Aligned) {
492  bool HasSSE1 = Subtarget->hasSSE1();
493  bool HasSSE2 = Subtarget->hasSSE2();
494  bool HasSSE4A = Subtarget->hasSSE4A();
495  bool HasAVX = Subtarget->hasAVX();
496  bool HasAVX512 = Subtarget->hasAVX512();
497  bool HasVLX = Subtarget->hasVLX();
498  bool IsNonTemporal = MMO && MMO->isNonTemporal();
499 
500  // Get opcode and regclass of the output for the given store instruction.
501  unsigned Opc = 0;
502  switch (VT.getSimpleVT().SimpleTy) {
503  case MVT::f80: // No f80 support yet.
504  default: return false;
505  case MVT::i1: {
506  // Mask out all but lowest bit.
507  Register AndResult = createResultReg(&X86::GR8RegClass);
508  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
509  TII.get(X86::AND8ri), AndResult)
510  .addReg(ValReg).addImm(1);
511  ValReg = AndResult;
512  LLVM_FALLTHROUGH; // handle i1 as i8.
513  }
514  case MVT::i8: Opc = X86::MOV8mr; break;
515  case MVT::i16: Opc = X86::MOV16mr; break;
516  case MVT::i32:
517  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
518  break;
519  case MVT::i64:
520  // Must be in x86-64 mode.
521  Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
522  break;
523  case MVT::f32:
524  if (X86ScalarSSEf32) {
525  if (IsNonTemporal && HasSSE4A)
526  Opc = X86::MOVNTSS;
527  else
528  Opc = HasAVX512 ? X86::VMOVSSZmr :
529  HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
530  } else
531  Opc = X86::ST_Fp32m;
532  break;
533  case MVT::f64:
534  if (X86ScalarSSEf32) {
535  if (IsNonTemporal && HasSSE4A)
536  Opc = X86::MOVNTSD;
537  else
538  Opc = HasAVX512 ? X86::VMOVSDZmr :
539  HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
540  } else
541  Opc = X86::ST_Fp64m;
542  break;
543  case MVT::x86mmx:
544  Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
545  break;
546  case MVT::v4f32:
547  if (Aligned) {
548  if (IsNonTemporal)
549  Opc = HasVLX ? X86::VMOVNTPSZ128mr :
550  HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
551  else
552  Opc = HasVLX ? X86::VMOVAPSZ128mr :
553  HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
554  } else
555  Opc = HasVLX ? X86::VMOVUPSZ128mr :
556  HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
557  break;
558  case MVT::v2f64:
559  if (Aligned) {
560  if (IsNonTemporal)
561  Opc = HasVLX ? X86::VMOVNTPDZ128mr :
562  HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
563  else
564  Opc = HasVLX ? X86::VMOVAPDZ128mr :
565  HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
566  } else
567  Opc = HasVLX ? X86::VMOVUPDZ128mr :
568  HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
569  break;
570  case MVT::v4i32:
571  case MVT::v2i64:
572  case MVT::v8i16:
573  case MVT::v16i8:
574  if (Aligned) {
575  if (IsNonTemporal)
576  Opc = HasVLX ? X86::VMOVNTDQZ128mr :
577  HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
578  else
579  Opc = HasVLX ? X86::VMOVDQA64Z128mr :
580  HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
581  } else
582  Opc = HasVLX ? X86::VMOVDQU64Z128mr :
583  HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
584  break;
585  case MVT::v8f32:
586  assert(HasAVX);
587  if (Aligned) {
588  if (IsNonTemporal)
589  Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
590  else
591  Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
592  } else
593  Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
594  break;
595  case MVT::v4f64:
596  assert(HasAVX);
597  if (Aligned) {
598  if (IsNonTemporal)
599  Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
600  else
601  Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
602  } else
603  Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
604  break;
605  case MVT::v8i32:
606  case MVT::v4i64:
607  case MVT::v16i16:
608  case MVT::v32i8:
609  assert(HasAVX);
610  if (Aligned) {
611  if (IsNonTemporal)
612  Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
613  else
614  Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
615  } else
616  Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
617  break;
618  case MVT::v16f32:
619  assert(HasAVX512);
620  if (Aligned)
621  Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
622  else
623  Opc = X86::VMOVUPSZmr;
624  break;
625  case MVT::v8f64:
626  assert(HasAVX512);
627  if (Aligned) {
628  Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
629  } else
630  Opc = X86::VMOVUPDZmr;
631  break;
632  case MVT::v8i64:
633  case MVT::v16i32:
634  case MVT::v32i16:
635  case MVT::v64i8:
636  assert(HasAVX512);
637  // Note: There are a lot more choices based on type with AVX-512, but
638  // there's really no advantage when the store isn't masked.
639  if (Aligned)
640  Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
641  else
642  Opc = X86::VMOVDQU64Zmr;
643  break;
644  }
645 
646  const MCInstrDesc &Desc = TII.get(Opc);
647  // Some of the instructions in the previous switch use FR128 instead
648  // of FR32 for ValReg. Make sure the register we feed the instruction
649  // matches its register class constraints.
650  // Note: This is fine to do a copy from FR32 to FR128, this is the
651  // same registers behind the scene and actually why it did not trigger
652  // any bugs before.
653  ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
654  MachineInstrBuilder MIB =
655  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc);
656  addFullAddress(MIB, AM).addReg(ValReg);
657  if (MMO)
658  MIB->addMemOperand(*FuncInfo.MF, MMO);
659 
660  return true;
661 }
662 
663 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
664  X86AddressMode &AM,
665  MachineMemOperand *MMO, bool Aligned) {
666  // Handle 'null' like i32/i64 0.
667  if (isa<ConstantPointerNull>(Val))
668  Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
669 
670  // If this is a store of a simple constant, fold the constant into the store.
671  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
672  unsigned Opc = 0;
673  bool Signed = true;
674  switch (VT.getSimpleVT().SimpleTy) {
675  default: break;
676  case MVT::i1:
677  Signed = false;
678  LLVM_FALLTHROUGH; // Handle as i8.
679  case MVT::i8: Opc = X86::MOV8mi; break;
680  case MVT::i16: Opc = X86::MOV16mi; break;
681  case MVT::i32: Opc = X86::MOV32mi; break;
682  case MVT::i64:
683  // Must be a 32-bit sign extended value.
684  if (isInt<32>(CI->getSExtValue()))
685  Opc = X86::MOV64mi32;
686  break;
687  }
688 
689  if (Opc) {
690  MachineInstrBuilder MIB =
691  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
692  addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
693  : CI->getZExtValue());
694  if (MMO)
695  MIB->addMemOperand(*FuncInfo.MF, MMO);
696  return true;
697  }
698  }
699 
700  Register ValReg = getRegForValue(Val);
701  if (ValReg == 0)
702  return false;
703 
704  return X86FastEmitStore(VT, ValReg, AM, MMO, Aligned);
705 }
706 
707 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
708 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
709 /// ISD::SIGN_EXTEND).
710 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
711  unsigned Src, EVT SrcVT,
712  unsigned &ResultReg) {
713  unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
714  if (RR == 0)
715  return false;
716 
717  ResultReg = RR;
718  return true;
719 }
720 
721 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
722  // Handle constant address.
723  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
724  // Can't handle alternate code models yet.
725  if (TM.getCodeModel() != CodeModel::Small)
726  return false;
727 
728  // Can't handle TLS yet.
729  if (GV->isThreadLocal())
730  return false;
731 
732  // Can't handle !absolute_symbol references yet.
733  if (GV->isAbsoluteSymbolRef())
734  return false;
735 
736  // RIP-relative addresses can't have additional register operands, so if
737  // we've already folded stuff into the addressing mode, just force the
738  // global value into its own register, which we can use as the basereg.
739  if (!Subtarget->isPICStyleRIPRel() ||
740  (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
741  // Okay, we've committed to selecting this global. Set up the address.
742  AM.GV = GV;
743 
744  // Allow the subtarget to classify the global.
745  unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
746 
747  // If this reference is relative to the pic base, set it now.
748  if (isGlobalRelativeToPICBase(GVFlags)) {
749  // FIXME: How do we know Base.Reg is free??
750  AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
751  }
752 
753  // Unless the ABI requires an extra load, return a direct reference to
754  // the global.
755  if (!isGlobalStubReference(GVFlags)) {
756  if (Subtarget->isPICStyleRIPRel()) {
757  // Use rip-relative addressing if we can. Above we verified that the
758  // base and index registers are unused.
759  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
760  AM.Base.Reg = X86::RIP;
761  }
762  AM.GVOpFlags = GVFlags;
763  return true;
764  }
765 
766  // Ok, we need to do a load from a stub. If we've already loaded from
767  // this stub, reuse the loaded pointer, otherwise emit the load now.
768  DenseMap<const Value *, Register>::iterator I = LocalValueMap.find(V);
769  Register LoadReg;
770  if (I != LocalValueMap.end() && I->second) {
771  LoadReg = I->second;
772  } else {
773  // Issue load from stub.
774  unsigned Opc = 0;
775  const TargetRegisterClass *RC = nullptr;
776  X86AddressMode StubAM;
777  StubAM.Base.Reg = AM.Base.Reg;
778  StubAM.GV = GV;
779  StubAM.GVOpFlags = GVFlags;
780 
781  // Prepare for inserting code in the local-value area.
782  SavePoint SaveInsertPt = enterLocalValueArea();
783 
784  if (TLI.getPointerTy(DL) == MVT::i64) {
785  Opc = X86::MOV64rm;
786  RC = &X86::GR64RegClass;
787  } else {
788  Opc = X86::MOV32rm;
789  RC = &X86::GR32RegClass;
790  }
791 
792  if (Subtarget->isPICStyleRIPRel() || GVFlags == X86II::MO_GOTPCREL)
793  StubAM.Base.Reg = X86::RIP;
794 
795  LoadReg = createResultReg(RC);
796  MachineInstrBuilder LoadMI =
797  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
798  addFullAddress(LoadMI, StubAM);
799 
800  // Ok, back to normal mode.
801  leaveLocalValueArea(SaveInsertPt);
802 
803  // Prevent loading GV stub multiple times in same MBB.
804  LocalValueMap[V] = LoadReg;
805  }
806 
807  // Now construct the final address. Note that the Disp, Scale,
808  // and Index values may already be set here.
809  AM.Base.Reg = LoadReg;
810  AM.GV = nullptr;
811  return true;
812  }
813  }
814 
815  // If all else fails, try to materialize the value in a register.
816  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
817  if (AM.Base.Reg == 0) {
818  AM.Base.Reg = getRegForValue(V);
819  return AM.Base.Reg != 0;
820  }
821  if (AM.IndexReg == 0) {
822  assert(AM.Scale == 1 && "Scale with no index!");
823  AM.IndexReg = getRegForValue(V);
824  return AM.IndexReg != 0;
825  }
826  }
827 
828  return false;
829 }
830 
831 /// X86SelectAddress - Attempt to fill in an address from the given value.
832 ///
835 redo_gep:
836  const User *U = nullptr;
837  unsigned Opcode = Instruction::UserOp1;
838  if (const Instruction *I = dyn_cast<Instruction>(V)) {
839  // Don't walk into other basic blocks; it's possible we haven't
840  // visited them yet, so the instructions may not yet be assigned
841  // virtual registers.
842  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
843  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
844  Opcode = I->getOpcode();
845  U = I;
846  }
847  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
848  Opcode = C->getOpcode();
849  U = C;
850  }
851 
852  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
853  if (Ty->getAddressSpace() > 255)
854  // Fast instruction selection doesn't support the special
855  // address spaces.
856  return false;
857 
858  switch (Opcode) {
859  default: break;
860  case Instruction::BitCast:
861  // Look past bitcasts.
862  return X86SelectAddress(U->getOperand(0), AM);
863 
864  case Instruction::IntToPtr:
865  // Look past no-op inttoptrs.
866  if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
867  TLI.getPointerTy(DL))
868  return X86SelectAddress(U->getOperand(0), AM);
869  break;
870 
871  case Instruction::PtrToInt:
872  // Look past no-op ptrtoints.
873  if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
874  return X86SelectAddress(U->getOperand(0), AM);
875  break;
876 
877  case Instruction::Alloca: {
878  // Do static allocas.
879  const AllocaInst *A = cast<AllocaInst>(V);
881  FuncInfo.StaticAllocaMap.find(A);
882  if (SI != FuncInfo.StaticAllocaMap.end()) {
884  AM.Base.FrameIndex = SI->second;
885  return true;
886  }
887  break;
888  }
889 
890  case Instruction::Add: {
891  // Adds of constants are common and easy enough.
892  if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
893  uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
894  // They have to fit in the 32-bit signed displacement field though.
895  if (isInt<32>(Disp)) {
896  AM.Disp = (uint32_t)Disp;
897  return X86SelectAddress(U->getOperand(0), AM);
898  }
899  }
900  break;
901  }
902 
903  case Instruction::GetElementPtr: {
904  X86AddressMode SavedAM = AM;
905 
906  // Pattern-match simple GEPs.
907  uint64_t Disp = (int32_t)AM.Disp;
908  unsigned IndexReg = AM.IndexReg;
909  unsigned Scale = AM.Scale;
911  // Iterate through the indices, folding what we can. Constants can be
912  // folded, and one dynamic index can be handled, if the scale is supported.
913  for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
914  i != e; ++i, ++GTI) {
915  const Value *Op = *i;
916  if (StructType *STy = GTI.getStructTypeOrNull()) {
917  const StructLayout *SL = DL.getStructLayout(STy);
918  Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
919  continue;
920  }
921 
922  // A array/variable index is always of the form i*S where S is the
923  // constant scale size. See if we can push the scale into immediates.
924  uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
925  for (;;) {
926  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
927  // Constant-offset addressing.
928  Disp += CI->getSExtValue() * S;
929  break;
930  }
931  if (canFoldAddIntoGEP(U, Op)) {
932  // A compatible add with a constant operand. Fold the constant.
933  ConstantInt *CI =
934  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
935  Disp += CI->getSExtValue() * S;
936  // Iterate on the other operand.
937  Op = cast<AddOperator>(Op)->getOperand(0);
938  continue;
939  }
940  if (IndexReg == 0 &&
941  (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
942  (S == 1 || S == 2 || S == 4 || S == 8)) {
943  // Scaled-index addressing.
944  Scale = S;
945  IndexReg = getRegForGEPIndex(Op);
946  if (IndexReg == 0)
947  return false;
948  break;
949  }
950  // Unsupported.
951  goto unsupported_gep;
952  }
953  }
954 
955  // Check for displacement overflow.
956  if (!isInt<32>(Disp))
957  break;
958 
959  AM.IndexReg = IndexReg;
960  AM.Scale = Scale;
961  AM.Disp = (uint32_t)Disp;
962  GEPs.push_back(V);
963 
964  if (const GetElementPtrInst *GEP =
965  dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
966  // Ok, the GEP indices were covered by constant-offset and scaled-index
967  // addressing. Update the address state and move on to examining the base.
968  V = GEP;
969  goto redo_gep;
970  } else if (X86SelectAddress(U->getOperand(0), AM)) {
971  return true;
972  }
973 
974  // If we couldn't merge the gep value into this addr mode, revert back to
975  // our address and just match the value instead of completely failing.
976  AM = SavedAM;
977 
978  for (const Value *I : reverse(GEPs))
979  if (handleConstantAddresses(I, AM))
980  return true;
981 
982  return false;
983  unsupported_gep:
984  // Ok, the GEP indices weren't all covered.
985  break;
986  }
987  }
988 
989  return handleConstantAddresses(V, AM);
990 }
991 
992 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
993 ///
994 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
995  const User *U = nullptr;
996  unsigned Opcode = Instruction::UserOp1;
997  const Instruction *I = dyn_cast<Instruction>(V);
998  // Record if the value is defined in the same basic block.
999  //
1000  // This information is crucial to know whether or not folding an
1001  // operand is valid.
1002  // Indeed, FastISel generates or reuses a virtual register for all
1003  // operands of all instructions it selects. Obviously, the definition and
1004  // its uses must use the same virtual register otherwise the produced
1005  // code is incorrect.
1006  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
1007  // registers for values that are alive across basic blocks. This ensures
1008  // that the values are consistently set between across basic block, even
1009  // if different instruction selection mechanisms are used (e.g., a mix of
1010  // SDISel and FastISel).
1011  // For values local to a basic block, the instruction selection process
1012  // generates these virtual registers with whatever method is appropriate
1013  // for its needs. In particular, FastISel and SDISel do not share the way
1014  // local virtual registers are set.
1015  // Therefore, this is impossible (or at least unsafe) to share values
1016  // between basic blocks unless they use the same instruction selection
1017  // method, which is not guarantee for X86.
1018  // Moreover, things like hasOneUse could not be used accurately, if we
1019  // allow to reference values across basic blocks whereas they are not
1020  // alive across basic blocks initially.
1021  bool InMBB = true;
1022  if (I) {
1023  Opcode = I->getOpcode();
1024  U = I;
1025  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
1026  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
1027  Opcode = C->getOpcode();
1028  U = C;
1029  }
1030 
1031  switch (Opcode) {
1032  default: break;
1033  case Instruction::BitCast:
1034  // Look past bitcasts if its operand is in the same BB.
1035  if (InMBB)
1036  return X86SelectCallAddress(U->getOperand(0), AM);
1037  break;
1038 
1039  case Instruction::IntToPtr:
1040  // Look past no-op inttoptrs if its operand is in the same BB.
1041  if (InMBB &&
1042  TLI.getValueType(DL, U->getOperand(0)->getType()) ==
1043  TLI.getPointerTy(DL))
1044  return X86SelectCallAddress(U->getOperand(0), AM);
1045  break;
1046 
1047  case Instruction::PtrToInt:
1048  // Look past no-op ptrtoints if its operand is in the same BB.
1049  if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
1050  return X86SelectCallAddress(U->getOperand(0), AM);
1051  break;
1052  }
1053 
1054  // Handle constant address.
1055  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1056  // Can't handle alternate code models yet.
1057  if (TM.getCodeModel() != CodeModel::Small)
1058  return false;
1059 
1060  // RIP-relative addresses can't have additional register operands.
1061  if (Subtarget->isPICStyleRIPRel() &&
1062  (AM.Base.Reg != 0 || AM.IndexReg != 0))
1063  return false;
1064 
1065  // Can't handle TLS.
1066  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
1067  if (GVar->isThreadLocal())
1068  return false;
1069 
1070  // Okay, we've committed to selecting this global. Set up the basic address.
1071  AM.GV = GV;
1072 
1073  // Return a direct reference to the global. Fastisel can handle calls to
1074  // functions that require loads, such as dllimport and nonlazybind
1075  // functions.
1076  if (Subtarget->isPICStyleRIPRel()) {
1077  // Use rip-relative addressing if we can. Above we verified that the
1078  // base and index registers are unused.
1079  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
1080  AM.Base.Reg = X86::RIP;
1081  } else {
1082  AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
1083  }
1084 
1085  return true;
1086  }
1087 
1088  // If all else fails, try to materialize the value in a register.
1089  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
1090  auto GetCallRegForValue = [this](const Value *V) {
1091  Register Reg = getRegForValue(V);
1092 
1093  // In 64-bit mode, we need a 64-bit register even if pointers are 32 bits.
1094  if (Reg && Subtarget->isTarget64BitILP32()) {
1095  Register CopyReg = createResultReg(&X86::GR32RegClass);
1096  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32rr),
1097  CopyReg)
1098  .addReg(Reg);
1099 
1100  Register ExtReg = createResultReg(&X86::GR64RegClass);
1101  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1102  TII.get(TargetOpcode::SUBREG_TO_REG), ExtReg)
1103  .addImm(0)
1104  .addReg(CopyReg)
1105  .addImm(X86::sub_32bit);
1106  Reg = ExtReg;
1107  }
1108 
1109  return Reg;
1110  };
1111 
1112  if (AM.Base.Reg == 0) {
1113  AM.Base.Reg = GetCallRegForValue(V);
1114  return AM.Base.Reg != 0;
1115  }
1116  if (AM.IndexReg == 0) {
1117  assert(AM.Scale == 1 && "Scale with no index!");
1118  AM.IndexReg = GetCallRegForValue(V);
1119  return AM.IndexReg != 0;
1120  }
1121  }
1122 
1123  return false;
1124 }
1125 
1126 
1127 /// X86SelectStore - Select and emit code to implement store instructions.
1128 bool X86FastISel::X86SelectStore(const Instruction *I) {
1129  // Atomic stores need special handling.
1130  const StoreInst *S = cast<StoreInst>(I);
1131 
1132  if (S->isAtomic())
1133  return false;
1134 
1135  const Value *PtrV = I->getOperand(1);
1136  if (TLI.supportSwiftError()) {
1137  // Swifterror values can come from either a function parameter with
1138  // swifterror attribute or an alloca with swifterror attribute.
1139  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1140  if (Arg->hasSwiftErrorAttr())
1141  return false;
1142  }
1143 
1144  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1145  if (Alloca->isSwiftError())
1146  return false;
1147  }
1148  }
1149 
1150  const Value *Val = S->getValueOperand();
1151  const Value *Ptr = S->getPointerOperand();
1152 
1153  MVT VT;
1154  if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
1155  return false;
1156 
1157  Align Alignment = S->getAlign();
1158  Align ABIAlignment = DL.getABITypeAlign(Val->getType());
1159  bool Aligned = Alignment >= ABIAlignment;
1160 
1161  X86AddressMode AM;
1162  if (!X86SelectAddress(Ptr, AM))
1163  return false;
1164 
1165  return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
1166 }
1167 
1168 /// X86SelectRet - Select and emit code to implement ret instructions.
1169 bool X86FastISel::X86SelectRet(const Instruction *I) {
1170  const ReturnInst *Ret = cast<ReturnInst>(I);
1171  const Function &F = *I->getParent()->getParent();
1172  const X86MachineFunctionInfo *X86MFInfo =
1173  FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
1174 
1175  if (!FuncInfo.CanLowerReturn)
1176  return false;
1177 
1178  if (TLI.supportSwiftError() &&
1179  F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1180  return false;
1181 
1182  if (TLI.supportSplitCSR(FuncInfo.MF))
1183  return false;
1184 
1185  CallingConv::ID CC = F.getCallingConv();
1186  if (CC != CallingConv::C &&
1187  CC != CallingConv::Fast &&
1188  CC != CallingConv::Tail &&
1189  CC != CallingConv::SwiftTail &&
1190  CC != CallingConv::X86_FastCall &&
1191  CC != CallingConv::X86_StdCall &&
1192  CC != CallingConv::X86_ThisCall &&
1193  CC != CallingConv::X86_64_SysV &&
1194  CC != CallingConv::Win64)
1195  return false;
1196 
1197  // Don't handle popping bytes if they don't fit the ret's immediate.
1198  if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
1199  return false;
1200 
1201  // fastcc with -tailcallopt is intended to provide a guaranteed
1202  // tail call optimization. Fastisel doesn't know how to do that.
1203  if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
1205  return false;
1206 
1207  // Let SDISel handle vararg functions.
1208  if (F.isVarArg())
1209  return false;
1210 
1211  // Build a list of return value registers.
1212  SmallVector<unsigned, 4> RetRegs;
1213 
1214  if (Ret->getNumOperands() > 0) {
1216  GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1217 
1218  // Analyze operands of the call, assigning locations to each operand.
1220  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
1221  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1222 
1223  const Value *RV = Ret->getOperand(0);
1224  Register Reg = getRegForValue(RV);
1225  if (Reg == 0)
1226  return false;
1227 
1228  // Only handle a single return value for now.
1229  if (ValLocs.size() != 1)
1230  return false;
1231 
1232  CCValAssign &VA = ValLocs[0];
1233 
1234  // Don't bother handling odd stuff for now.
1235  if (VA.getLocInfo() != CCValAssign::Full)
1236  return false;
1237  // Only handle register returns for now.
1238  if (!VA.isRegLoc())
1239  return false;
1240 
1241  // The calling-convention tables for x87 returns don't tell
1242  // the whole story.
1243  if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
1244  return false;
1245 
1246  unsigned SrcReg = Reg + VA.getValNo();
1247  EVT SrcVT = TLI.getValueType(DL, RV->getType());
1248  EVT DstVT = VA.getValVT();
1249  // Special handling for extended integers.
1250  if (SrcVT != DstVT) {
1251  if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1252  return false;
1253 
1254  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1255  return false;
1256 
1257  assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1258 
1259  if (SrcVT == MVT::i1) {
1260  if (Outs[0].Flags.isSExt())
1261  return false;
1262  // TODO
1263  SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg);
1264  SrcVT = MVT::i8;
1265  }
1266  unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1268  // TODO
1269  SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op, SrcReg);
1270  }
1271 
1272  // Make the copy.
1273  Register DstReg = VA.getLocReg();
1274  const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
1275  // Avoid a cross-class copy. This is very unlikely.
1276  if (!SrcRC->contains(DstReg))
1277  return false;
1278  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1279  TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
1280 
1281  // Add register to return instruction.
1282  RetRegs.push_back(VA.getLocReg());
1283  }
1284 
1285  // Swift calling convention does not require we copy the sret argument
1286  // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1287 
1288  // All x86 ABIs require that for returning structs by value we copy
1289  // the sret argument into %rax/%eax (depending on ABI) for the return.
1290  // We saved the argument into a virtual register in the entry block,
1291  // so now we copy the value out and into %rax/%eax.
1292  if (F.hasStructRetAttr() && CC != CallingConv::Swift &&
1293  CC != CallingConv::SwiftTail) {
1294  Register Reg = X86MFInfo->getSRetReturnReg();
1295  assert(Reg &&
1296  "SRetReturnReg should have been set in LowerFormalArguments()!");
1297  unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
1298  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1299  TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
1300  RetRegs.push_back(RetReg);
1301  }
1302 
1303  // Now emit the RET.
1304  MachineInstrBuilder MIB;
1305  if (X86MFInfo->getBytesToPopOnReturn()) {
1306  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1307  TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL))
1308  .addImm(X86MFInfo->getBytesToPopOnReturn());
1309  } else {
1310  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1311  TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
1312  }
1313  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1314  MIB.addReg(RetRegs[i], RegState::Implicit);
1315  return true;
1316 }
1317 
1318 /// X86SelectLoad - Select and emit code to implement load instructions.
1319 ///
1320 bool X86FastISel::X86SelectLoad(const Instruction *I) {
1321  const LoadInst *LI = cast<LoadInst>(I);
1322 
1323  // Atomic loads need special handling.
1324  if (LI->isAtomic())
1325  return false;
1326 
1327  const Value *SV = I->getOperand(0);
1328  if (TLI.supportSwiftError()) {
1329  // Swifterror values can come from either a function parameter with
1330  // swifterror attribute or an alloca with swifterror attribute.
1331  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1332  if (Arg->hasSwiftErrorAttr())
1333  return false;
1334  }
1335 
1336  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1337  if (Alloca->isSwiftError())
1338  return false;
1339  }
1340  }
1341 
1342  MVT VT;
1343  if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1344  return false;
1345 
1346  const Value *Ptr = LI->getPointerOperand();
1347 
1348  X86AddressMode AM;
1349  if (!X86SelectAddress(Ptr, AM))
1350  return false;
1351 
1352  unsigned ResultReg = 0;
1353  if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1354  LI->getAlign().value()))
1355  return false;
1356 
1357  updateValueMap(I, ResultReg);
1358  return true;
1359 }
1360 
1361 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1362  bool HasAVX512 = Subtarget->hasAVX512();
1363  bool HasAVX = Subtarget->hasAVX();
1364  bool X86ScalarSSEf32 = Subtarget->hasSSE1();
1365  bool X86ScalarSSEf64 = Subtarget->hasSSE2();
1366 
1367  switch (VT.getSimpleVT().SimpleTy) {
1368  default: return 0;
1369  case MVT::i8: return X86::CMP8rr;
1370  case MVT::i16: return X86::CMP16rr;
1371  case MVT::i32: return X86::CMP32rr;
1372  case MVT::i64: return X86::CMP64rr;
1373  case MVT::f32:
1374  return X86ScalarSSEf32
1375  ? (HasAVX512 ? X86::VUCOMISSZrr
1376  : HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr)
1377  : 0;
1378  case MVT::f64:
1379  return X86ScalarSSEf64
1380  ? (HasAVX512 ? X86::VUCOMISDZrr
1381  : HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr)
1382  : 0;
1383  }
1384 }
1385 
1386 /// If we have a comparison with RHS as the RHS of the comparison, return an
1387 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1388 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1389  int64_t Val = RHSC->getSExtValue();
1390  switch (VT.getSimpleVT().SimpleTy) {
1391  // Otherwise, we can't fold the immediate into this comparison.
1392  default:
1393  return 0;
1394  case MVT::i8:
1395  return X86::CMP8ri;
1396  case MVT::i16:
1397  if (isInt<8>(Val))
1398  return X86::CMP16ri8;
1399  return X86::CMP16ri;
1400  case MVT::i32:
1401  if (isInt<8>(Val))
1402  return X86::CMP32ri8;
1403  return X86::CMP32ri;
1404  case MVT::i64:
1405  if (isInt<8>(Val))
1406  return X86::CMP64ri8;
1407  // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1408  // field.
1409  if (isInt<32>(Val))
1410  return X86::CMP64ri32;
1411  return 0;
1412  }
1413 }
1414 
1415 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
1416  const DebugLoc &CurDbgLoc) {
1417  Register Op0Reg = getRegForValue(Op0);
1418  if (Op0Reg == 0) return false;
1419 
1420  // Handle 'null' like i32/i64 0.
1421  if (isa<ConstantPointerNull>(Op1))
1422  Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1423 
1424  // We have two options: compare with register or immediate. If the RHS of
1425  // the compare is an immediate that we can fold into this compare, use
1426  // CMPri, otherwise use CMPrr.
1427  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1428  if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1429  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
1430  .addReg(Op0Reg)
1431  .addImm(Op1C->getSExtValue());
1432  return true;
1433  }
1434  }
1435 
1436  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1437  if (CompareOpc == 0) return false;
1438 
1439  Register Op1Reg = getRegForValue(Op1);
1440  if (Op1Reg == 0) return false;
1441  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
1442  .addReg(Op0Reg)
1443  .addReg(Op1Reg);
1444 
1445  return true;
1446 }
1447 
1448 bool X86FastISel::X86SelectCmp(const Instruction *I) {
1449  const CmpInst *CI = cast<CmpInst>(I);
1450 
1451  MVT VT;
1452  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1453  return false;
1454 
1455  // Below code only works for scalars.
1456  if (VT.isVector())
1457  return false;
1458 
1459  // Try to optimize or fold the cmp.
1460  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1461  unsigned ResultReg = 0;
1462  switch (Predicate) {
1463  default: break;
1464  case CmpInst::FCMP_FALSE: {
1465  ResultReg = createResultReg(&X86::GR32RegClass);
1466  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
1467  ResultReg);
1468  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, X86::sub_8bit);
1469  if (!ResultReg)
1470  return false;
1471  break;
1472  }
1473  case CmpInst::FCMP_TRUE: {
1474  ResultReg = createResultReg(&X86::GR8RegClass);
1475  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
1476  ResultReg).addImm(1);
1477  break;
1478  }
1479  }
1480 
1481  if (ResultReg) {
1482  updateValueMap(I, ResultReg);
1483  return true;
1484  }
1485 
1486  const Value *LHS = CI->getOperand(0);
1487  const Value *RHS = CI->getOperand(1);
1488 
1489  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1490  // We don't have to materialize a zero constant for this case and can just use
1491  // %x again on the RHS.
1493  const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1494  if (RHSC && RHSC->isNullValue())
1495  RHS = LHS;
1496  }
1497 
1498  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1499  static const uint16_t SETFOpcTable[2][3] = {
1500  { X86::COND_E, X86::COND_NP, X86::AND8rr },
1501  { X86::COND_NE, X86::COND_P, X86::OR8rr }
1502  };
1503  const uint16_t *SETFOpc = nullptr;
1504  switch (Predicate) {
1505  default: break;
1506  case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1507  case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1508  }
1509 
1510  ResultReg = createResultReg(&X86::GR8RegClass);
1511  if (SETFOpc) {
1512  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1513  return false;
1514 
1515  Register FlagReg1 = createResultReg(&X86::GR8RegClass);
1516  Register FlagReg2 = createResultReg(&X86::GR8RegClass);
1517  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1518  FlagReg1).addImm(SETFOpc[0]);
1519  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1520  FlagReg2).addImm(SETFOpc[1]);
1521  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
1522  ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1523  updateValueMap(I, ResultReg);
1524  return true;
1525  }
1526 
1527  X86::CondCode CC;
1528  bool SwapArgs;
1529  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1530  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1531 
1532  if (SwapArgs)
1533  std::swap(LHS, RHS);
1534 
1535  // Emit a compare of LHS/RHS.
1536  if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1537  return false;
1538 
1539  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
1540  ResultReg).addImm(CC);
1541  updateValueMap(I, ResultReg);
1542  return true;
1543 }
1544 
1545 bool X86FastISel::X86SelectZExt(const Instruction *I) {
1546  EVT DstVT = TLI.getValueType(DL, I->getType());
1547  if (!TLI.isTypeLegal(DstVT))
1548  return false;
1549 
1550  Register ResultReg = getRegForValue(I->getOperand(0));
1551  if (ResultReg == 0)
1552  return false;
1553 
1554  // Handle zero-extension from i1 to i8, which is common.
1555  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1556  if (SrcVT == MVT::i1) {
1557  // Set the high bits to zero.
1558  ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg);
1559  SrcVT = MVT::i8;
1560 
1561  if (ResultReg == 0)
1562  return false;
1563  }
1564 
1565  if (DstVT == MVT::i64) {
1566  // Handle extension to 64-bits via sub-register shenanigans.
1567  unsigned MovInst;
1568 
1569  switch (SrcVT.SimpleTy) {
1570  case MVT::i8: MovInst = X86::MOVZX32rr8; break;
1571  case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1572  case MVT::i32: MovInst = X86::MOV32rr; break;
1573  default: llvm_unreachable("Unexpected zext to i64 source type");
1574  }
1575 
1576  Register Result32 = createResultReg(&X86::GR32RegClass);
1577  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
1578  .addReg(ResultReg);
1579 
1580  ResultReg = createResultReg(&X86::GR64RegClass);
1581  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
1582  ResultReg)
1583  .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1584  } else if (DstVT == MVT::i16) {
1585  // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1586  // extend to 32-bits and then extract down to 16-bits.
1587  Register Result32 = createResultReg(&X86::GR32RegClass);
1588  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8),
1589  Result32).addReg(ResultReg);
1590 
1591  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, X86::sub_16bit);
1592  } else if (DstVT != MVT::i8) {
1593  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1594  ResultReg);
1595  if (ResultReg == 0)
1596  return false;
1597  }
1598 
1599  updateValueMap(I, ResultReg);
1600  return true;
1601 }
1602 
1603 bool X86FastISel::X86SelectSExt(const Instruction *I) {
1604  EVT DstVT = TLI.getValueType(DL, I->getType());
1605  if (!TLI.isTypeLegal(DstVT))
1606  return false;
1607 
1608  Register ResultReg = getRegForValue(I->getOperand(0));
1609  if (ResultReg == 0)
1610  return false;
1611 
1612  // Handle sign-extension from i1 to i8.
1613  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1614  if (SrcVT == MVT::i1) {
1615  // Set the high bits to zero.
1616  Register ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg);
1617  if (ZExtReg == 0)
1618  return false;
1619 
1620  // Negate the result to make an 8-bit sign extended value.
1621  ResultReg = createResultReg(&X86::GR8RegClass);
1622  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::NEG8r),
1623  ResultReg).addReg(ZExtReg);
1624 
1625  SrcVT = MVT::i8;
1626  }
1627 
1628  if (DstVT == MVT::i16) {
1629  // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1630  // extend to 32-bits and then extract down to 16-bits.
1631  Register Result32 = createResultReg(&X86::GR32RegClass);
1632  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8),
1633  Result32).addReg(ResultReg);
1634 
1635  ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, X86::sub_16bit);
1636  } else if (DstVT != MVT::i8) {
1637  ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
1638  ResultReg);
1639  if (ResultReg == 0)
1640  return false;
1641  }
1642 
1643  updateValueMap(I, ResultReg);
1644  return true;
1645 }
1646 
1647 bool X86FastISel::X86SelectBranch(const Instruction *I) {
1648  // Unconditional branches are selected by tablegen-generated code.
1649  // Handle a conditional branch.
1650  const BranchInst *BI = cast<BranchInst>(I);
1651  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1652  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1653 
1654  // Fold the common case of a conditional branch with a comparison
1655  // in the same block (values defined on other blocks may not have
1656  // initialized registers).
1657  X86::CondCode CC;
1658  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1659  if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1660  EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1661 
1662  // Try to optimize or fold the cmp.
1663  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1664  switch (Predicate) {
1665  default: break;
1666  case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
1667  case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
1668  }
1669 
1670  const Value *CmpLHS = CI->getOperand(0);
1671  const Value *CmpRHS = CI->getOperand(1);
1672 
1673  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1674  // 0.0.
1675  // We don't have to materialize a zero constant for this case and can just
1676  // use %x again on the RHS.
1678  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1679  if (CmpRHSC && CmpRHSC->isNullValue())
1680  CmpRHS = CmpLHS;
1681  }
1682 
1683  // Try to take advantage of fallthrough opportunities.
1684  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1685  std::swap(TrueMBB, FalseMBB);
1687  }
1688 
1689  // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1690  // code check. Instead two branch instructions are required to check all
1691  // the flags. First we change the predicate to a supported condition code,
1692  // which will be the first branch. Later one we will emit the second
1693  // branch.
1694  bool NeedExtraBranch = false;
1695  switch (Predicate) {
1696  default: break;
1697  case CmpInst::FCMP_OEQ:
1698  std::swap(TrueMBB, FalseMBB);
1700  case CmpInst::FCMP_UNE:
1701  NeedExtraBranch = true;
1703  break;
1704  }
1705 
1706  bool SwapArgs;
1707  std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1708  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1709 
1710  if (SwapArgs)
1711  std::swap(CmpLHS, CmpRHS);
1712 
1713  // Emit a compare of the LHS and RHS, setting the flags.
1714  if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
1715  return false;
1716 
1717  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1718  .addMBB(TrueMBB).addImm(CC);
1719 
1720  // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1721  // to UNE above).
1722  if (NeedExtraBranch) {
1723  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1724  .addMBB(TrueMBB).addImm(X86::COND_P);
1725  }
1726 
1727  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1728  return true;
1729  }
1730  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1731  // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1732  // typically happen for _Bool and C++ bools.
1733  MVT SourceVT;
1734  if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1735  isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1736  unsigned TestOpc = 0;
1737  switch (SourceVT.SimpleTy) {
1738  default: break;
1739  case MVT::i8: TestOpc = X86::TEST8ri; break;
1740  case MVT::i16: TestOpc = X86::TEST16ri; break;
1741  case MVT::i32: TestOpc = X86::TEST32ri; break;
1742  case MVT::i64: TestOpc = X86::TEST64ri32; break;
1743  }
1744  if (TestOpc) {
1745  Register OpReg = getRegForValue(TI->getOperand(0));
1746  if (OpReg == 0) return false;
1747 
1748  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
1749  .addReg(OpReg).addImm(1);
1750 
1751  unsigned JmpCond = X86::COND_NE;
1752  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1753  std::swap(TrueMBB, FalseMBB);
1754  JmpCond = X86::COND_E;
1755  }
1756 
1757  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1758  .addMBB(TrueMBB).addImm(JmpCond);
1759 
1760  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1761  return true;
1762  }
1763  }
1764  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1765  // Fake request the condition, otherwise the intrinsic might be completely
1766  // optimized away.
1767  Register TmpReg = getRegForValue(BI->getCondition());
1768  if (TmpReg == 0)
1769  return false;
1770 
1771  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1772  .addMBB(TrueMBB).addImm(CC);
1773  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1774  return true;
1775  }
1776 
1777  // Otherwise do a clumsy setcc and re-test it.
1778  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1779  // in an explicit cast, so make sure to handle that correctly.
1780  Register OpReg = getRegForValue(BI->getCondition());
1781  if (OpReg == 0) return false;
1782 
1783  // In case OpReg is a K register, COPY to a GPR
1784  if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1785  unsigned KOpReg = OpReg;
1786  OpReg = createResultReg(&X86::GR32RegClass);
1787  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1788  TII.get(TargetOpcode::COPY), OpReg)
1789  .addReg(KOpReg);
1790  OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, X86::sub_8bit);
1791  }
1792  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1793  .addReg(OpReg)
1794  .addImm(1);
1795  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
1796  .addMBB(TrueMBB).addImm(X86::COND_NE);
1797  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1798  return true;
1799 }
1800 
1801 bool X86FastISel::X86SelectShift(const Instruction *I) {
1802  unsigned CReg = 0, OpReg = 0;
1803  const TargetRegisterClass *RC = nullptr;
1804  if (I->getType()->isIntegerTy(8)) {
1805  CReg = X86::CL;
1806  RC = &X86::GR8RegClass;
1807  switch (I->getOpcode()) {
1808  case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1809  case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1810  case Instruction::Shl: OpReg = X86::SHL8rCL; break;
1811  default: return false;
1812  }
1813  } else if (I->getType()->isIntegerTy(16)) {
1814  CReg = X86::CX;
1815  RC = &X86::GR16RegClass;
1816  switch (I->getOpcode()) {
1817  default: llvm_unreachable("Unexpected shift opcode");
1818  case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1819  case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1820  case Instruction::Shl: OpReg = X86::SHL16rCL; break;
1821  }
1822  } else if (I->getType()->isIntegerTy(32)) {
1823  CReg = X86::ECX;
1824  RC = &X86::GR32RegClass;
1825  switch (I->getOpcode()) {
1826  default: llvm_unreachable("Unexpected shift opcode");
1827  case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1828  case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1829  case Instruction::Shl: OpReg = X86::SHL32rCL; break;
1830  }
1831  } else if (I->getType()->isIntegerTy(64)) {
1832  CReg = X86::RCX;
1833  RC = &X86::GR64RegClass;
1834  switch (I->getOpcode()) {
1835  default: llvm_unreachable("Unexpected shift opcode");
1836  case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1837  case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1838  case Instruction::Shl: OpReg = X86::SHL64rCL; break;
1839  }
1840  } else {
1841  return false;
1842  }
1843 
1844  MVT VT;
1845  if (!isTypeLegal(I->getType(), VT))
1846  return false;
1847 
1848  Register Op0Reg = getRegForValue(I->getOperand(0));
1849  if (Op0Reg == 0) return false;
1850 
1851  Register Op1Reg = getRegForValue(I->getOperand(1));
1852  if (Op1Reg == 0) return false;
1853  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1854  CReg).addReg(Op1Reg);
1855 
1856  // The shift instruction uses X86::CL. If we defined a super-register
1857  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1858  if (CReg != X86::CL)
1859  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1860  TII.get(TargetOpcode::KILL), X86::CL)
1861  .addReg(CReg, RegState::Kill);
1862 
1863  Register ResultReg = createResultReg(RC);
1864  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
1865  .addReg(Op0Reg);
1866  updateValueMap(I, ResultReg);
1867  return true;
1868 }
1869 
1870 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1871  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1872  const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1873  const static bool S = true; // IsSigned
1874  const static bool U = false; // !IsSigned
1875  const static unsigned Copy = TargetOpcode::COPY;
1876  // For the X86 DIV/IDIV instruction, in most cases the dividend
1877  // (numerator) must be in a specific register pair highreg:lowreg,
1878  // producing the quotient in lowreg and the remainder in highreg.
1879  // For most data types, to set up the instruction, the dividend is
1880  // copied into lowreg, and lowreg is sign-extended or zero-extended
1881  // into highreg. The exception is i8, where the dividend is defined
1882  // as a single register rather than a register pair, and we
1883  // therefore directly sign-extend or zero-extend the dividend into
1884  // lowreg, instead of copying, and ignore the highreg.
1885  const static struct DivRemEntry {
1886  // The following portion depends only on the data type.
1887  const TargetRegisterClass *RC;
1888  unsigned LowInReg; // low part of the register pair
1889  unsigned HighInReg; // high part of the register pair
1890  // The following portion depends on both the data type and the operation.
1891  struct DivRemResult {
1892  unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1893  unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1894  // highreg, or copying a zero into highreg.
1895  unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1896  // zero/sign-extending into lowreg for i8.
1897  unsigned DivRemResultReg; // Register containing the desired result.
1898  bool IsOpSigned; // Whether to use signed or unsigned form.
1899  } ResultTable[NumOps];
1900  } OpTable[NumTypes] = {
1901  { &X86::GR8RegClass, X86::AX, 0, {
1902  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1903  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1904  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1905  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1906  }
1907  }, // i8
1908  { &X86::GR16RegClass, X86::AX, X86::DX, {
1909  { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1910  { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1911  { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
1912  { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
1913  }
1914  }, // i16
1915  { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1916  { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1917  { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1918  { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1919  { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1920  }
1921  }, // i32
1922  { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1923  { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1924  { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1925  { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
1926  { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
1927  }
1928  }, // i64
1929  };
1930 
1931  MVT VT;
1932  if (!isTypeLegal(I->getType(), VT))
1933  return false;
1934 
1935  unsigned TypeIndex, OpIndex;
1936  switch (VT.SimpleTy) {
1937  default: return false;
1938  case MVT::i8: TypeIndex = 0; break;
1939  case MVT::i16: TypeIndex = 1; break;
1940  case MVT::i32: TypeIndex = 2; break;
1941  case MVT::i64: TypeIndex = 3;
1942  if (!Subtarget->is64Bit())
1943  return false;
1944  break;
1945  }
1946 
1947  switch (I->getOpcode()) {
1948  default: llvm_unreachable("Unexpected div/rem opcode");
1949  case Instruction::SDiv: OpIndex = 0; break;
1950  case Instruction::SRem: OpIndex = 1; break;
1951  case Instruction::UDiv: OpIndex = 2; break;
1952  case Instruction::URem: OpIndex = 3; break;
1953  }
1954 
1955  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1956  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1957  Register Op0Reg = getRegForValue(I->getOperand(0));
1958  if (Op0Reg == 0)
1959  return false;
1960  Register Op1Reg = getRegForValue(I->getOperand(1));
1961  if (Op1Reg == 0)
1962  return false;
1963 
1964  // Move op0 into low-order input register.
1965  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1966  TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1967  // Zero-extend or sign-extend into high-order input register.
1968  if (OpEntry.OpSignExtend) {
1969  if (OpEntry.IsOpSigned)
1970  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1971  TII.get(OpEntry.OpSignExtend));
1972  else {
1973  Register Zero32 = createResultReg(&X86::GR32RegClass);
1974  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1975  TII.get(X86::MOV32r0), Zero32);
1976 
1977  // Copy the zero into the appropriate sub/super/identical physical
1978  // register. Unfortunately the operations needed are not uniform enough
1979  // to fit neatly into the table above.
1980  if (VT == MVT::i16) {
1981  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1982  TII.get(Copy), TypeEntry.HighInReg)
1983  .addReg(Zero32, 0, X86::sub_16bit);
1984  } else if (VT == MVT::i32) {
1985  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1986  TII.get(Copy), TypeEntry.HighInReg)
1987  .addReg(Zero32);
1988  } else if (VT == MVT::i64) {
1989  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1990  TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1991  .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1992  }
1993  }
1994  }
1995  // Generate the DIV/IDIV instruction.
1996  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1997  TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1998  // For i8 remainder, we can't reference ah directly, as we'll end
1999  // up with bogus copies like %r9b = COPY %ah. Reference ax
2000  // instead to prevent ah references in a rex instruction.
2001  //
2002  // The current assumption of the fast register allocator is that isel
2003  // won't generate explicit references to the GR8_NOREX registers. If
2004  // the allocator and/or the backend get enhanced to be more robust in
2005  // that regard, this can be, and should be, removed.
2006  unsigned ResultReg = 0;
2007  if ((I->getOpcode() == Instruction::SRem ||
2008  I->getOpcode() == Instruction::URem) &&
2009  OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
2010  Register SourceSuperReg = createResultReg(&X86::GR16RegClass);
2011  Register ResultSuperReg = createResultReg(&X86::GR16RegClass);
2012  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2013  TII.get(Copy), SourceSuperReg).addReg(X86::AX);
2014 
2015  // Shift AX right by 8 bits instead of using AH.
2016  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
2017  ResultSuperReg).addReg(SourceSuperReg).addImm(8);
2018 
2019  // Now reference the 8-bit subreg of the result.
2020  ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
2021  X86::sub_8bit);
2022  }
2023  // Copy the result out of the physreg if we haven't already.
2024  if (!ResultReg) {
2025  ResultReg = createResultReg(TypeEntry.RC);
2026  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
2027  .addReg(OpEntry.DivRemResultReg);
2028  }
2029  updateValueMap(I, ResultReg);
2030 
2031  return true;
2032 }
2033 
2034 /// Emit a conditional move instruction (if the are supported) to lower
2035 /// the select.
2036 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
2037  // Check if the subtarget supports these instructions.
2038  if (!Subtarget->hasCMov())
2039  return false;
2040 
2041  // FIXME: Add support for i8.
2042  if (RetVT < MVT::i16 || RetVT > MVT::i64)
2043  return false;
2044 
2045  const Value *Cond = I->getOperand(0);
2046  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2047  bool NeedTest = true;
2049 
2050  // Optimize conditions coming from a compare if both instructions are in the
2051  // same basic block (values defined in other basic blocks may not have
2052  // initialized registers).
2053  const auto *CI = dyn_cast<CmpInst>(Cond);
2054  if (CI && (CI->getParent() == I->getParent())) {
2055  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2056 
2057  // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2058  static const uint16_t SETFOpcTable[2][3] = {
2059  { X86::COND_NP, X86::COND_E, X86::TEST8rr },
2060  { X86::COND_P, X86::COND_NE, X86::OR8rr }
2061  };
2062  const uint16_t *SETFOpc = nullptr;
2063  switch (Predicate) {
2064  default: break;
2065  case CmpInst::FCMP_OEQ:
2066  SETFOpc = &SETFOpcTable[0][0];
2068  break;
2069  case CmpInst::FCMP_UNE:
2070  SETFOpc = &SETFOpcTable[1][0];
2072  break;
2073  }
2074 
2075  bool NeedSwap;
2076  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
2077  assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
2078 
2079  const Value *CmpLHS = CI->getOperand(0);
2080  const Value *CmpRHS = CI->getOperand(1);
2081  if (NeedSwap)
2082  std::swap(CmpLHS, CmpRHS);
2083 
2084  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2085  // Emit a compare of the LHS and RHS, setting the flags.
2086  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2087  return false;
2088 
2089  if (SETFOpc) {
2090  Register FlagReg1 = createResultReg(&X86::GR8RegClass);
2091  Register FlagReg2 = createResultReg(&X86::GR8RegClass);
2092  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2093  FlagReg1).addImm(SETFOpc[0]);
2094  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2095  FlagReg2).addImm(SETFOpc[1]);
2096  auto const &II = TII.get(SETFOpc[2]);
2097  if (II.getNumDefs()) {
2098  Register TmpReg = createResultReg(&X86::GR8RegClass);
2099  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
2100  .addReg(FlagReg2).addReg(FlagReg1);
2101  } else {
2102  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2103  .addReg(FlagReg2).addReg(FlagReg1);
2104  }
2105  }
2106  NeedTest = false;
2107  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
2108  // Fake request the condition, otherwise the intrinsic might be completely
2109  // optimized away.
2110  Register TmpReg = getRegForValue(Cond);
2111  if (TmpReg == 0)
2112  return false;
2113 
2114  NeedTest = false;
2115  }
2116 
2117  if (NeedTest) {
2118  // Selects operate on i1, however, CondReg is 8 bits width and may contain
2119  // garbage. Indeed, only the less significant bit is supposed to be
2120  // accurate. If we read more than the lsb, we may see non-zero values
2121  // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2122  // the select. This is achieved by performing TEST against 1.
2123  Register CondReg = getRegForValue(Cond);
2124  if (CondReg == 0)
2125  return false;
2126 
2127  // In case OpReg is a K register, COPY to a GPR
2128  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2129  unsigned KCondReg = CondReg;
2130  CondReg = createResultReg(&X86::GR32RegClass);
2131  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2132  TII.get(TargetOpcode::COPY), CondReg)
2133  .addReg(KCondReg);
2134  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, X86::sub_8bit);
2135  }
2136  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2137  .addReg(CondReg)
2138  .addImm(1);
2139  }
2140 
2141  const Value *LHS = I->getOperand(1);
2142  const Value *RHS = I->getOperand(2);
2143 
2144  Register RHSReg = getRegForValue(RHS);
2145  Register LHSReg = getRegForValue(LHS);
2146  if (!LHSReg || !RHSReg)
2147  return false;
2148 
2149  const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
2150  unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(*RC)/8);
2151  Register ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, LHSReg, CC);
2152  updateValueMap(I, ResultReg);
2153  return true;
2154 }
2155 
2156 /// Emit SSE or AVX instructions to lower the select.
2157 ///
2158 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2159 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2160 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2161 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
2162  // Optimize conditions coming from a compare if both instructions are in the
2163  // same basic block (values defined in other basic blocks may not have
2164  // initialized registers).
2165  const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
2166  if (!CI || (CI->getParent() != I->getParent()))
2167  return false;
2168 
2169  if (I->getType() != CI->getOperand(0)->getType() ||
2170  !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
2171  (Subtarget->hasSSE2() && RetVT == MVT::f64)))
2172  return false;
2173 
2174  const Value *CmpLHS = CI->getOperand(0);
2175  const Value *CmpRHS = CI->getOperand(1);
2176  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2177 
2178  // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2179  // We don't have to materialize a zero constant for this case and can just use
2180  // %x again on the RHS.
2182  const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2183  if (CmpRHSC && CmpRHSC->isNullValue())
2184  CmpRHS = CmpLHS;
2185  }
2186 
2187  unsigned CC;
2188  bool NeedSwap;
2189  std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
2190  if (CC > 7 && !Subtarget->hasAVX())
2191  return false;
2192 
2193  if (NeedSwap)
2194  std::swap(CmpLHS, CmpRHS);
2195 
2196  const Value *LHS = I->getOperand(1);
2197  const Value *RHS = I->getOperand(2);
2198 
2199  Register LHSReg = getRegForValue(LHS);
2200  Register RHSReg = getRegForValue(RHS);
2201  Register CmpLHSReg = getRegForValue(CmpLHS);
2202  Register CmpRHSReg = getRegForValue(CmpRHS);
2203  if (!LHSReg || !RHSReg || !CmpLHSReg || !CmpRHSReg)
2204  return false;
2205 
2206  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2207  unsigned ResultReg;
2208 
2209  if (Subtarget->hasAVX512()) {
2210  // If we have AVX512 we can use a mask compare and masked movss/sd.
2211  const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
2212  const TargetRegisterClass *VK1 = &X86::VK1RegClass;
2213 
2214  unsigned CmpOpcode =
2215  (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2216  Register CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpRHSReg,
2217  CC);
2218 
2219  // Need an IMPLICIT_DEF for the input that is used to generate the upper
2220  // bits of the result register since its not based on any of the inputs.
2221  Register ImplicitDefReg = createResultReg(VR128X);
2222  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2223  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2224 
2225  // Place RHSReg is the passthru of the masked movss/sd operation and put
2226  // LHS in the input. The mask input comes from the compare.
2227  unsigned MovOpcode =
2228  (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2229  unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, CmpReg,
2230  ImplicitDefReg, LHSReg);
2231 
2232  ResultReg = createResultReg(RC);
2233  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2234  TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
2235 
2236  } else if (Subtarget->hasAVX()) {
2237  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2238 
2239  // If we have AVX, create 1 blendv instead of 3 logic instructions.
2240  // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2241  // uses XMM0 as the selection register. That may need just as many
2242  // instructions as the AND/ANDN/OR sequence due to register moves, so
2243  // don't bother.
2244  unsigned CmpOpcode =
2245  (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2246  unsigned BlendOpcode =
2247  (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2248 
2249  Register CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpRHSReg,
2250  CC);
2251  Register VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, LHSReg,
2252  CmpReg);
2253  ResultReg = createResultReg(RC);
2254  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2255  TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
2256  } else {
2257  // Choose the SSE instruction sequence based on data type (float or double).
2258  static const uint16_t OpcTable[2][4] = {
2259  { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2260  { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2261  };
2262 
2263  const uint16_t *Opc = nullptr;
2264  switch (RetVT.SimpleTy) {
2265  default: return false;
2266  case MVT::f32: Opc = &OpcTable[0][0]; break;
2267  case MVT::f64: Opc = &OpcTable[1][0]; break;
2268  }
2269 
2270  const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2271  Register CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpRHSReg, CC);
2272  Register AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, LHSReg);
2273  Register AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, RHSReg);
2274  Register OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, AndReg);
2275  ResultReg = createResultReg(RC);
2276  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2277  TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
2278  }
2279  updateValueMap(I, ResultReg);
2280  return true;
2281 }
2282 
2283 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
2284  // These are pseudo CMOV instructions and will be later expanded into control-
2285  // flow.
2286  unsigned Opc;
2287  switch (RetVT.SimpleTy) {
2288  default: return false;
2289  case MVT::i8: Opc = X86::CMOV_GR8; break;
2290  case MVT::i16: Opc = X86::CMOV_GR16; break;
2291  case MVT::f16: Opc = X86::CMOV_FR16X; break;
2292  case MVT::i32: Opc = X86::CMOV_GR32; break;
2293  case MVT::f32: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X
2294  : X86::CMOV_FR32; break;
2295  case MVT::f64: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X
2296  : X86::CMOV_FR64; break;
2297  }
2298 
2299  const Value *Cond = I->getOperand(0);
2301 
2302  // Optimize conditions coming from a compare if both instructions are in the
2303  // same basic block (values defined in other basic blocks may not have
2304  // initialized registers).
2305  const auto *CI = dyn_cast<CmpInst>(Cond);
2306  if (CI && (CI->getParent() == I->getParent())) {
2307  bool NeedSwap;
2308  std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
2309  if (CC > X86::LAST_VALID_COND)
2310  return false;
2311 
2312  const Value *CmpLHS = CI->getOperand(0);
2313  const Value *CmpRHS = CI->getOperand(1);
2314 
2315  if (NeedSwap)
2316  std::swap(CmpLHS, CmpRHS);
2317 
2318  EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2319  if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2320  return false;
2321  } else {
2322  Register CondReg = getRegForValue(Cond);
2323  if (CondReg == 0)
2324  return false;
2325 
2326  // In case OpReg is a K register, COPY to a GPR
2327  if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2328  unsigned KCondReg = CondReg;
2329  CondReg = createResultReg(&X86::GR32RegClass);
2330  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2331  TII.get(TargetOpcode::COPY), CondReg)
2332  .addReg(KCondReg);
2333  CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, X86::sub_8bit);
2334  }
2335  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2336  .addReg(CondReg)
2337  .addImm(1);
2338  }
2339 
2340  const Value *LHS = I->getOperand(1);
2341  const Value *RHS = I->getOperand(2);
2342 
2343  Register LHSReg = getRegForValue(LHS);
2344  Register RHSReg = getRegForValue(RHS);
2345  if (!LHSReg || !RHSReg)
2346  return false;
2347 
2348  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2349 
2350  Register ResultReg =
2351  fastEmitInst_rri(Opc, RC, RHSReg, LHSReg, CC);
2352  updateValueMap(I, ResultReg);
2353  return true;
2354 }
2355 
2356 bool X86FastISel::X86SelectSelect(const Instruction *I) {
2357  MVT RetVT;
2358  if (!isTypeLegal(I->getType(), RetVT))
2359  return false;
2360 
2361  // Check if we can fold the select.
2362  if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2363  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2364  const Value *Opnd = nullptr;
2365  switch (Predicate) {
2366  default: break;
2367  case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2368  case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
2369  }
2370  // No need for a select anymore - this is an unconditional move.
2371  if (Opnd) {
2372  Register OpReg = getRegForValue(Opnd);
2373  if (OpReg == 0)
2374  return false;
2375  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2376  Register ResultReg = createResultReg(RC);
2377  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2378  TII.get(TargetOpcode::COPY), ResultReg)
2379  .addReg(OpReg);
2380  updateValueMap(I, ResultReg);
2381  return true;
2382  }
2383  }
2384 
2385  // First try to use real conditional move instructions.
2386  if (X86FastEmitCMoveSelect(RetVT, I))
2387  return true;
2388 
2389  // Try to use a sequence of SSE instructions to simulate a conditional move.
2390  if (X86FastEmitSSESelect(RetVT, I))
2391  return true;
2392 
2393  // Fall-back to pseudo conditional move instructions, which will be later
2394  // converted to control-flow.
2395  if (X86FastEmitPseudoSelect(RetVT, I))
2396  return true;
2397 
2398  return false;
2399 }
2400 
2401 // Common code for X86SelectSIToFP and X86SelectUIToFP.
2402 bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
2403  // The target-independent selection algorithm in FastISel already knows how
2404  // to select a SINT_TO_FP if the target is SSE but not AVX.
2405  // Early exit if the subtarget doesn't have AVX.
2406  // Unsigned conversion requires avx512.
2407  bool HasAVX512 = Subtarget->hasAVX512();
2408  if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
2409  return false;
2410 
2411  // TODO: We could sign extend narrower types.
2412  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
2413  if (SrcVT != MVT::i32 && SrcVT != MVT::i64)
2414  return false;
2415 
2416  // Select integer to float/double conversion.
2417  Register OpReg = getRegForValue(I->getOperand(0));
2418  if (OpReg == 0)
2419  return false;
2420 
2421  unsigned Opcode;
2422 
2423  static const uint16_t SCvtOpc[2][2][2] = {
2424  { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr },
2425  { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } },
2426  { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
2427  { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
2428  };
2429  static const uint16_t UCvtOpc[2][2] = {
2430  { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
2431  { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
2432  };
2433  bool Is64Bit = SrcVT == MVT::i64;
2434 
2435  if (I->getType()->isDoubleTy()) {
2436  // s/uitofp int -> double
2437  Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
2438  } else if (I->getType()->isFloatTy()) {
2439  // s/uitofp int -> float
2440  Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
2441  } else
2442  return false;
2443 
2444  MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT();
2445  const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT);
2446  Register ImplicitDefReg = createResultReg(RC);
2447  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2448  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2449  Register ResultReg = fastEmitInst_rr(Opcode, RC, ImplicitDefReg, OpReg);
2450  updateValueMap(I, ResultReg);
2451  return true;
2452 }
2453 
2454 bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
2455  return X86SelectIntToFP(I, /*IsSigned*/true);
2456 }
2457 
2458 bool X86FastISel::X86SelectUIToFP(const Instruction *I) {
2459  return X86SelectIntToFP(I, /*IsSigned*/false);
2460 }
2461 
2462 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2463 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
2464  unsigned TargetOpc,
2465  const TargetRegisterClass *RC) {
2466  assert((I->getOpcode() == Instruction::FPExt ||
2467  I->getOpcode() == Instruction::FPTrunc) &&
2468  "Instruction must be an FPExt or FPTrunc!");
2469  bool HasAVX = Subtarget->hasAVX();
2470 
2471  Register OpReg = getRegForValue(I->getOperand(0));
2472  if (OpReg == 0)
2473  return false;
2474 
2475  unsigned ImplicitDefReg;
2476  if (HasAVX) {
2477  ImplicitDefReg = createResultReg(RC);
2478  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2479  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2480 
2481  }
2482 
2483  Register ResultReg = createResultReg(RC);
2484  MachineInstrBuilder MIB;
2485  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
2486  ResultReg);
2487 
2488  if (HasAVX)
2489  MIB.addReg(ImplicitDefReg);
2490 
2491  MIB.addReg(OpReg);
2492  updateValueMap(I, ResultReg);
2493  return true;
2494 }
2495 
2496 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2497  if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
2498  I->getOperand(0)->getType()->isFloatTy()) {
2499  bool HasAVX512 = Subtarget->hasAVX512();
2500  // fpext from float to double.
2501  unsigned Opc =
2502  HasAVX512 ? X86::VCVTSS2SDZrr
2503  : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2504  return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f64));
2505  }
2506 
2507  return false;
2508 }
2509 
2510 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2511  if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
2512  I->getOperand(0)->getType()->isDoubleTy()) {
2513  bool HasAVX512 = Subtarget->hasAVX512();
2514  // fptrunc from double to float.
2515  unsigned Opc =
2516  HasAVX512 ? X86::VCVTSD2SSZrr
2517  : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2518  return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f32));
2519  }
2520 
2521  return false;
2522 }
2523 
2524 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2525  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
2526  EVT DstVT = TLI.getValueType(DL, I->getType());
2527 
2528  // This code only handles truncation to byte.
2529  if (DstVT != MVT::i8 && DstVT != MVT::i1)
2530  return false;
2531  if (!TLI.isTypeLegal(SrcVT))
2532  return false;
2533 
2534  Register InputReg = getRegForValue(I->getOperand(0));
2535  if (!InputReg)
2536  // Unhandled operand. Halt "fast" selection and bail.
2537  return false;
2538 
2539  if (SrcVT == MVT::i8) {
2540  // Truncate from i8 to i1; no code needed.
2541  updateValueMap(I, InputReg);
2542  return true;
2543  }
2544 
2545  // Issue an extract_subreg.
2546  Register ResultReg = fastEmitInst_extractsubreg(MVT::i8, InputReg,
2547  X86::sub_8bit);
2548  if (!ResultReg)
2549  return false;
2550 
2551  updateValueMap(I, ResultReg);
2552  return true;
2553 }
2554 
2555 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2556  return Len <= (Subtarget->is64Bit() ? 32 : 16);
2557 }
2558 
2559 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2560  X86AddressMode SrcAM, uint64_t Len) {
2561 
2562  // Make sure we don't bloat code by inlining very large memcpy's.
2563  if (!IsMemcpySmall(Len))
2564  return false;
2565 
2566  bool i64Legal = Subtarget->is64Bit();
2567 
2568  // We don't care about alignment here since we just emit integer accesses.
2569  while (Len) {
2570  MVT VT;
2571  if (Len >= 8 && i64Legal)
2572  VT = MVT::i64;
2573  else if (Len >= 4)
2574  VT = MVT::i32;
2575  else if (Len >= 2)
2576  VT = MVT::i16;
2577  else
2578  VT = MVT::i8;
2579 
2580  unsigned Reg;
2581  bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2582  RV &= X86FastEmitStore(VT, Reg, DestAM);
2583  assert(RV && "Failed to emit load or store??");
2584  (void)RV;
2585 
2586  unsigned Size = VT.getSizeInBits()/8;
2587  Len -= Size;
2588  DestAM.Disp += Size;
2589  SrcAM.Disp += Size;
2590  }
2591 
2592  return true;
2593 }
2594 
2595 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2596  // FIXME: Handle more intrinsics.
2597  switch (II->getIntrinsicID()) {
2598  default: return false;
2599  case Intrinsic::convert_from_fp16:
2600  case Intrinsic::convert_to_fp16: {
2601  if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2602  return false;
2603 
2604  const Value *Op = II->getArgOperand(0);
2605  Register InputReg = getRegForValue(Op);
2606  if (InputReg == 0)
2607  return false;
2608 
2609  // F16C only allows converting from float to half and from half to float.
2610  bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
2611  if (IsFloatToHalf) {
2612  if (!Op->getType()->isFloatTy())
2613  return false;
2614  } else {
2615  if (!II->getType()->isFloatTy())
2616  return false;
2617  }
2618 
2619  unsigned ResultReg = 0;
2620  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
2621  if (IsFloatToHalf) {
2622  // 'InputReg' is implicitly promoted from register class FR32 to
2623  // register class VR128 by method 'constrainOperandRegClass' which is
2624  // directly called by 'fastEmitInst_ri'.
2625  // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2626  // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2627  // It's consistent with the other FP instructions, which are usually
2628  // controlled by MXCSR.
2629  unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPS2PHZ128rr
2630  : X86::VCVTPS2PHrr;
2631  InputReg = fastEmitInst_ri(Opc, RC, InputReg, 4);
2632 
2633  // Move the lower 32-bits of ResultReg to another register of class GR32.
2634  Opc = Subtarget->hasAVX512() ? X86::VMOVPDI2DIZrr
2635  : X86::VMOVPDI2DIrr;
2636  ResultReg = createResultReg(&X86::GR32RegClass);
2637  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2638  .addReg(InputReg, RegState::Kill);
2639 
2640  // The result value is in the lower 16-bits of ResultReg.
2641  unsigned RegIdx = X86::sub_16bit;
2642  ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, RegIdx);
2643  } else {
2644  assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2645  // Explicitly zero-extend the input to 32-bit.
2646  InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::ZERO_EXTEND, InputReg);
2647 
2648  // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2649  InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2650  InputReg);
2651 
2652  unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPH2PSZ128rr
2653  : X86::VCVTPH2PSrr;
2654  InputReg = fastEmitInst_r(Opc, RC, InputReg);
2655 
2656  // The result value is in the lower 32-bits of ResultReg.
2657  // Emit an explicit copy from register class VR128 to register class FR32.
2658  ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
2659  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2660  TII.get(TargetOpcode::COPY), ResultReg)
2661  .addReg(InputReg, RegState::Kill);
2662  }
2663 
2664  updateValueMap(II, ResultReg);
2665  return true;
2666  }
2667  case Intrinsic::frameaddress: {
2668  MachineFunction *MF = FuncInfo.MF;
2669  if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
2670  return false;
2671 
2672  Type *RetTy = II->getCalledFunction()->getReturnType();
2673 
2674  MVT VT;
2675  if (!isTypeLegal(RetTy, VT))
2676  return false;
2677 
2678  unsigned Opc;
2679  const TargetRegisterClass *RC = nullptr;
2680 
2681  switch (VT.SimpleTy) {
2682  default: llvm_unreachable("Invalid result type for frameaddress.");
2683  case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2684  case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2685  }
2686 
2687  // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2688  // we get the wrong frame register.
2689  MachineFrameInfo &MFI = MF->getFrameInfo();
2690  MFI.setFrameAddressIsTaken(true);
2691 
2692  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2693  unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
2694  assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2695  (FrameReg == X86::EBP && VT == MVT::i32)) &&
2696  "Invalid Frame Register!");
2697 
2698  // Always make a copy of the frame register to a vreg first, so that we
2699  // never directly reference the frame register (the TwoAddressInstruction-
2700  // Pass doesn't like that).
2701  Register SrcReg = createResultReg(RC);
2702  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2703  TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2704 
2705  // Now recursively load from the frame address.
2706  // movq (%rbp), %rax
2707  // movq (%rax), %rax
2708  // movq (%rax), %rax
2709  // ...
2710  unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2711  while (Depth--) {
2712  Register DestReg = createResultReg(RC);
2713  addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2714  TII.get(Opc), DestReg), SrcReg);
2715  SrcReg = DestReg;
2716  }
2717 
2718  updateValueMap(II, SrcReg);
2719  return true;
2720  }
2721  case Intrinsic::memcpy: {
2722  const MemCpyInst *MCI = cast<MemCpyInst>(II);
2723  // Don't handle volatile or variable length memcpys.
2724  if (MCI->isVolatile())
2725  return false;
2726 
2727  if (isa<ConstantInt>(MCI->getLength())) {
2728  // Small memcpy's are common enough that we want to do them
2729  // without a call if possible.
2730  uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
2731  if (IsMemcpySmall(Len)) {
2732  X86AddressMode DestAM, SrcAM;
2733  if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
2734  !X86SelectAddress(MCI->getRawSource(), SrcAM))
2735  return false;
2736  TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2737  return true;
2738  }
2739  }
2740 
2741  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2742  if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
2743  return false;
2744 
2745  if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
2746  return false;
2747 
2748  return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1);
2749  }
2750  case Intrinsic::memset: {
2751  const MemSetInst *MSI = cast<MemSetInst>(II);
2752 
2753  if (MSI->isVolatile())
2754  return false;
2755 
2756  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2757  if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
2758  return false;
2759 
2760  if (MSI->getDestAddressSpace() > 255)
2761  return false;
2762 
2763  return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
2764  }
2765  case Intrinsic::stackprotector: {
2766  // Emit code to store the stack guard onto the stack.
2767  EVT PtrTy = TLI.getPointerTy(DL);
2768 
2769  const Value *Op1 = II->getArgOperand(0); // The guard's value.
2770  const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
2771 
2772  MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2773 
2774  // Grab the frame index.
2775  X86AddressMode AM;
2776  if (!X86SelectAddress(Slot, AM)) return false;
2777  if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2778  return true;
2779  }
2780  case Intrinsic::dbg_declare: {
2781  const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
2782  X86AddressMode AM;
2783  assert(DI->getAddress() && "Null address should be checked earlier!");
2784  if (!X86SelectAddress(DI->getAddress(), AM))
2785  return false;
2786  const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2787  // FIXME may need to add RegState::Debug to any registers produced,
2788  // although ESP/EBP should be the only ones at the moment.
2790  "Expected inlined-at fields to agree");
2791  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
2792  .addImm(0)
2793  .addMetadata(DI->getVariable())
2794  .addMetadata(DI->getExpression());
2795  return true;
2796  }
2797  case Intrinsic::trap: {
2798  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
2799  return true;
2800  }
2801  case Intrinsic::sqrt: {
2802  if (!Subtarget->hasSSE1())
2803  return false;
2804 
2805  Type *RetTy = II->getCalledFunction()->getReturnType();
2806 
2807  MVT VT;
2808  if (!isTypeLegal(RetTy, VT))
2809  return false;
2810 
2811  // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2812  // is not generated by FastISel yet.
2813  // FIXME: Update this code once tablegen can handle it.
2814  static const uint16_t SqrtOpc[3][2] = {
2815  { X86::SQRTSSr, X86::SQRTSDr },
2816  { X86::VSQRTSSr, X86::VSQRTSDr },
2817  { X86::VSQRTSSZr, X86::VSQRTSDZr },
2818  };
2819  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
2820  Subtarget->hasAVX() ? 1 :
2821  0;
2822  unsigned Opc;
2823  switch (VT.SimpleTy) {
2824  default: return false;
2825  case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break;
2826  case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break;
2827  }
2828 
2829  const Value *SrcVal = II->getArgOperand(0);
2830  Register SrcReg = getRegForValue(SrcVal);
2831 
2832  if (SrcReg == 0)
2833  return false;
2834 
2835  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
2836  unsigned ImplicitDefReg = 0;
2837  if (AVXLevel > 0) {
2838  ImplicitDefReg = createResultReg(RC);
2839  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2840  TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2841  }
2842 
2843  Register ResultReg = createResultReg(RC);
2844  MachineInstrBuilder MIB;
2845  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
2846  ResultReg);
2847 
2848  if (ImplicitDefReg)
2849  MIB.addReg(ImplicitDefReg);
2850 
2851  MIB.addReg(SrcReg);
2852 
2853  updateValueMap(II, ResultReg);
2854  return true;
2855  }
2856  case Intrinsic::sadd_with_overflow:
2857  case Intrinsic::uadd_with_overflow:
2858  case Intrinsic::ssub_with_overflow:
2859  case Intrinsic::usub_with_overflow:
2860  case Intrinsic::smul_with_overflow:
2861  case Intrinsic::umul_with_overflow: {
2862  // This implements the basic lowering of the xalu with overflow intrinsics
2863  // into add/sub/mul followed by either seto or setb.
2864  const Function *Callee = II->getCalledFunction();
2865  auto *Ty = cast<StructType>(Callee->getReturnType());
2866  Type *RetTy = Ty->getTypeAtIndex(0U);
2867  assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
2868  Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2869  "Overflow value expected to be an i1");
2870 
2871  MVT VT;
2872  if (!isTypeLegal(RetTy, VT))
2873  return false;
2874 
2875  if (VT < MVT::i8 || VT > MVT::i64)
2876  return false;
2877 
2878  const Value *LHS = II->getArgOperand(0);
2879  const Value *RHS = II->getArgOperand(1);
2880 
2881  // Canonicalize immediate to the RHS.
2882  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
2883  std::swap(LHS, RHS);
2884 
2885  unsigned BaseOpc, CondCode;
2886  switch (II->getIntrinsicID()) {
2887  default: llvm_unreachable("Unexpected intrinsic!");
2888  case Intrinsic::sadd_with_overflow:
2889  BaseOpc = ISD::ADD; CondCode = X86::COND_O; break;
2890  case Intrinsic::uadd_with_overflow:
2891  BaseOpc = ISD::ADD; CondCode = X86::COND_B; break;
2892  case Intrinsic::ssub_with_overflow:
2893  BaseOpc = ISD::SUB; CondCode = X86::COND_O; break;
2894  case Intrinsic::usub_with_overflow:
2895  BaseOpc = ISD::SUB; CondCode = X86::COND_B; break;
2896  case Intrinsic::smul_with_overflow:
2897  BaseOpc = X86ISD::SMUL; CondCode = X86::COND_O; break;
2898  case Intrinsic::umul_with_overflow:
2899  BaseOpc = X86ISD::UMUL; CondCode = X86::COND_O; break;
2900  }
2901 
2902  Register LHSReg = getRegForValue(LHS);
2903  if (LHSReg == 0)
2904  return false;
2905 
2906  unsigned ResultReg = 0;
2907  // Check if we have an immediate version.
2908  if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2909  static const uint16_t Opc[2][4] = {
2910  { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2911  { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2912  };
2913 
2914  if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
2915  CondCode == X86::COND_O) {
2916  // We can use INC/DEC.
2917  ResultReg = createResultReg(TLI.getRegClassFor(VT));
2918  bool IsDec = BaseOpc == ISD::SUB;
2919  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2920  TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2921  .addReg(LHSReg);
2922  } else
2923  ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, CI->getZExtValue());
2924  }
2925 
2926  unsigned RHSReg;
2927  if (!ResultReg) {
2928  RHSReg = getRegForValue(RHS);
2929  if (RHSReg == 0)
2930  return false;
2931  ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, RHSReg);
2932  }
2933 
2934  // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2935  // it manually.
2936  if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2937  static const uint16_t MULOpc[] =
2938  { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2939  static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2940  // First copy the first operand into RAX, which is an implicit input to
2941  // the X86::MUL*r instruction.
2942  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2943  TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2944  .addReg(LHSReg);
2945  ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2946  TLI.getRegClassFor(VT), RHSReg);
2947  } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2948  static const uint16_t MULOpc[] =
2949  { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2950  if (VT == MVT::i8) {
2951  // Copy the first operand into AL, which is an implicit input to the
2952  // X86::IMUL8r instruction.
2953  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2954  TII.get(TargetOpcode::COPY), X86::AL)
2955  .addReg(LHSReg);
2956  ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg);
2957  } else
2958  ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2959  TLI.getRegClassFor(VT), LHSReg, RHSReg);
2960  }
2961 
2962  if (!ResultReg)
2963  return false;
2964 
2965  // Assign to a GPR since the overflow return value is lowered to a SETcc.
2966  Register ResultReg2 = createResultReg(&X86::GR8RegClass);
2967  assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2968  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
2969  ResultReg2).addImm(CondCode);
2970 
2971  updateValueMap(II, ResultReg, 2);
2972  return true;
2973  }
2974  case Intrinsic::x86_sse_cvttss2si:
2975  case Intrinsic::x86_sse_cvttss2si64:
2976  case Intrinsic::x86_sse2_cvttsd2si:
2977  case Intrinsic::x86_sse2_cvttsd2si64: {
2978  bool IsInputDouble;
2979  switch (II->getIntrinsicID()) {
2980  default: llvm_unreachable("Unexpected intrinsic.");
2981  case Intrinsic::x86_sse_cvttss2si:
2982  case Intrinsic::x86_sse_cvttss2si64:
2983  if (!Subtarget->hasSSE1())
2984  return false;
2985  IsInputDouble = false;
2986  break;
2987  case Intrinsic::x86_sse2_cvttsd2si:
2988  case Intrinsic::x86_sse2_cvttsd2si64:
2989  if (!Subtarget->hasSSE2())
2990  return false;
2991  IsInputDouble = true;
2992  break;
2993  }
2994 
2995  Type *RetTy = II->getCalledFunction()->getReturnType();
2996  MVT VT;
2997  if (!isTypeLegal(RetTy, VT))
2998  return false;
2999 
3000  static const uint16_t CvtOpc[3][2][2] = {
3001  { { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr },
3002  { X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } },
3003  { { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr },
3004  { X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } },
3005  { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr },
3006  { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } },
3007  };
3008  unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
3009  Subtarget->hasAVX() ? 1 :
3010  0;
3011  unsigned Opc;
3012  switch (VT.SimpleTy) {
3013  default: llvm_unreachable("Unexpected result type.");
3014  case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0]; break;
3015  case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1]; break;
3016  }
3017 
3018  // Check if we can fold insertelement instructions into the convert.
3019  const Value *Op = II->getArgOperand(0);
3020  while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
3021  const Value *Index = IE->getOperand(2);
3022  if (!isa<ConstantInt>(Index))
3023  break;
3024  unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
3025 
3026  if (Idx == 0) {
3027  Op = IE->getOperand(1);
3028  break;
3029  }
3030  Op = IE->getOperand(0);
3031  }
3032 
3033  Register Reg = getRegForValue(Op);
3034  if (Reg == 0)
3035  return false;
3036 
3037  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3038  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3039  .addReg(Reg);
3040 
3041  updateValueMap(II, ResultReg);
3042  return true;
3043  }
3044  }
3045 }
3046 
3047 bool X86FastISel::fastLowerArguments() {
3048  if (!FuncInfo.CanLowerReturn)
3049  return false;
3050 
3051  const Function *F = FuncInfo.Fn;
3052  if (F->isVarArg())
3053  return false;
3054 
3055  CallingConv::ID CC = F->getCallingConv();
3056  if (CC != CallingConv::C)
3057  return false;
3058 
3059  if (Subtarget->isCallingConvWin64(CC))
3060  return false;
3061 
3062  if (!Subtarget->is64Bit())
3063  return false;
3064 
3065  if (Subtarget->useSoftFloat())
3066  return false;
3067 
3068  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3069  unsigned GPRCnt = 0;
3070  unsigned FPRCnt = 0;
3071  for (auto const &Arg : F->args()) {
3072  if (Arg.hasAttribute(Attribute::ByVal) ||
3073  Arg.hasAttribute(Attribute::InReg) ||
3074  Arg.hasAttribute(Attribute::StructRet) ||
3075  Arg.hasAttribute(Attribute::SwiftSelf) ||
3076  Arg.hasAttribute(Attribute::SwiftAsync) ||
3077  Arg.hasAttribute(Attribute::SwiftError) ||
3078  Arg.hasAttribute(Attribute::Nest))
3079  return false;
3080 
3081  Type *ArgTy = Arg.getType();
3082  if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3083  return false;
3084 
3085  EVT ArgVT = TLI.getValueType(DL, ArgTy);
3086  if (!ArgVT.isSimple()) return false;
3087  switch (ArgVT.getSimpleVT().SimpleTy) {
3088  default: return false;
3089  case MVT::i32:
3090  case MVT::i64:
3091  ++GPRCnt;
3092  break;
3093  case MVT::f32:
3094  case MVT::f64:
3095  if (!Subtarget->hasSSE1())
3096  return false;
3097  ++FPRCnt;
3098  break;
3099  }
3100 
3101  if (GPRCnt > 6)
3102  return false;
3103 
3104  if (FPRCnt > 8)
3105  return false;
3106  }
3107 
3108  static const MCPhysReg GPR32ArgRegs[] = {
3109  X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
3110  };
3111  static const MCPhysReg GPR64ArgRegs[] = {
3112  X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3113  };
3114  static const MCPhysReg XMMArgRegs[] = {
3115  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3116  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3117  };
3118 
3119  unsigned GPRIdx = 0;
3120  unsigned FPRIdx = 0;
3121  for (auto const &Arg : F->args()) {
3122  MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3123  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
3124  unsigned SrcReg;
3125  switch (VT.SimpleTy) {
3126  default: llvm_unreachable("Unexpected value type.");
3127  case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
3128  case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
3129  case MVT::f32: LLVM_FALLTHROUGH;
3130  case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
3131  }
3132  Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3133  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3134  // Without this, EmitLiveInCopies may eliminate the livein if its only
3135  // use is a bitcast (which isn't turned into an instruction).
3136  Register ResultReg = createResultReg(RC);
3137  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3138  TII.get(TargetOpcode::COPY), ResultReg)
3139  .addReg(DstReg, getKillRegState(true));
3140  updateValueMap(&Arg, ResultReg);
3141  }
3142  return true;
3143 }
3144 
3145 static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
3146  CallingConv::ID CC,
3147  const CallBase *CB) {
3148  if (Subtarget->is64Bit())
3149  return 0;
3150  if (Subtarget->getTargetTriple().isOSMSVCRT())
3151  return 0;
3152  if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3153  CC == CallingConv::HiPE || CC == CallingConv::Tail ||
3154  CC == CallingConv::SwiftTail)
3155  return 0;
3156 
3157  if (CB)
3158  if (CB->arg_empty() || !CB->paramHasAttr(0, Attribute::StructRet) ||
3159  CB->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
3160  return 0;
3161 
3162  return 4;
3163 }
3164 
3165 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3166  auto &OutVals = CLI.OutVals;
3167  auto &OutFlags = CLI.OutFlags;
3168  auto &OutRegs = CLI.OutRegs;
3169  auto &Ins = CLI.Ins;
3170  auto &InRegs = CLI.InRegs;
3171  CallingConv::ID CC = CLI.CallConv;
3172  bool &IsTailCall = CLI.IsTailCall;
3173  bool IsVarArg = CLI.IsVarArg;
3174  const Value *Callee = CLI.Callee;
3175  MCSymbol *Symbol = CLI.Symbol;
3176  const auto *CB = CLI.CB;
3177 
3178  bool Is64Bit = Subtarget->is64Bit();
3179  bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3180 
3181  // Call / invoke instructions with NoCfCheck attribute require special
3182  // handling.
3183  if (CB && CB->doesNoCfCheck())
3184  return false;
3185 
3186  // Functions with no_caller_saved_registers that need special handling.
3187  if ((CB && isa<CallInst>(CB) && CB->hasFnAttr("no_caller_saved_registers")))
3188  return false;
3189 
3190  // Functions with no_callee_saved_registers that need special handling.
3191  if ((CB && CB->hasFnAttr("no_callee_saved_registers")))
3192  return false;
3193 
3194  // Functions using thunks for indirect calls need to use SDISel.
3195  if (Subtarget->useIndirectThunkCalls())
3196  return false;
3197 
3198  // Handle only C, fastcc, and webkit_js calling conventions for now.
3199  switch (CC) {
3200  default: return false;
3201  case CallingConv::C:
3202  case CallingConv::Fast:
3203  case CallingConv::Tail:
3205  case CallingConv::Swift:
3210  case CallingConv::Win64:
3213  break;
3214  }
3215 
3216  // Allow SelectionDAG isel to handle tail calls.
3217  if (IsTailCall)
3218  return false;
3219 
3220  // fastcc with -tailcallopt is intended to provide a guaranteed
3221  // tail call optimization. Fastisel doesn't know how to do that.
3222  if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
3224  return false;
3225 
3226  // Don't know how to handle Win64 varargs yet. Nothing special needed for
3227  // x86-32. Special handling for x86-64 is implemented.
3228  if (IsVarArg && IsWin64)
3229  return false;
3230 
3231  // Don't know about inalloca yet.
3232  if (CLI.CB && CLI.CB->hasInAllocaArgument())
3233  return false;
3234 
3235  for (auto Flag : CLI.OutFlags)
3236  if (Flag.isSwiftError() || Flag.isPreallocated())
3237  return false;
3238 
3239  SmallVector<MVT, 16> OutVTs;
3240  SmallVector<unsigned, 16> ArgRegs;
3241 
3242  // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3243  // instruction. This is safe because it is common to all FastISel supported
3244  // calling conventions on x86.
3245  for (int i = 0, e = OutVals.size(); i != e; ++i) {
3246  Value *&Val = OutVals[i];
3247  ISD::ArgFlagsTy Flags = OutFlags[i];
3248  if (auto *CI = dyn_cast<ConstantInt>(Val)) {
3249  if (CI->getBitWidth() < 32) {
3250  if (Flags.isSExt())
3252  else
3254  }
3255  }
3256 
3257  // Passing bools around ends up doing a trunc to i1 and passing it.
3258  // Codegen this as an argument + "and 1".
3259  MVT VT;
3260  auto *TI = dyn_cast<TruncInst>(Val);
3261  unsigned ResultReg;
3262  if (TI && TI->getType()->isIntegerTy(1) && CLI.CB &&
3263  (TI->getParent() == CLI.CB->getParent()) && TI->hasOneUse()) {
3264  Value *PrevVal = TI->getOperand(0);
3265  ResultReg = getRegForValue(PrevVal);
3266 
3267  if (!ResultReg)
3268  return false;
3269 
3270  if (!isTypeLegal(PrevVal->getType(), VT))
3271  return false;
3272 
3273  ResultReg = fastEmit_ri(VT, VT, ISD::AND, ResultReg, 1);
3274  } else {
3275  if (!isTypeLegal(Val->getType(), VT) ||
3276  (VT.isVector() && VT.getVectorElementType() == MVT::i1))
3277  return false;
3278  ResultReg = getRegForValue(Val);
3279  }
3280 
3281  if (!ResultReg)
3282  return false;
3283 
3284  ArgRegs.push_back(ResultReg);
3285  OutVTs.push_back(VT);
3286  }
3287 
3288  // Analyze operands of the call, assigning locations to each operand.
3290  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
3291 
3292  // Allocate shadow area for Win64
3293  if (IsWin64)
3294  CCInfo.AllocateStack(32, Align(8));
3295 
3296  CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
3297 
3298  // Get a count of how many bytes are to be pushed on the stack.
3299  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3300 
3301  // Issue CALLSEQ_START
3302  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3303  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3304  .addImm(NumBytes).addImm(0).addImm(0);
3305 
3306  // Walk the register/memloc assignments, inserting copies/loads.
3307  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3308  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3309  CCValAssign const &VA = ArgLocs[i];
3310  const Value *ArgVal = OutVals[VA.getValNo()];
3311  MVT ArgVT = OutVTs[VA.getValNo()];
3312 
3313  if (ArgVT == MVT::x86mmx)
3314  return false;
3315 
3316  unsigned ArgReg = ArgRegs[VA.getValNo()];
3317 
3318  // Promote the value if needed.
3319  switch (VA.getLocInfo()) {
3320  case CCValAssign::Full: break;
3321  case CCValAssign::SExt: {
3322  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3323  "Unexpected extend");
3324 
3325  if (ArgVT == MVT::i1)
3326  return false;
3327 
3328  bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3329  ArgVT, ArgReg);
3330  assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
3331  ArgVT = VA.getLocVT();
3332  break;
3333  }
3334  case CCValAssign::ZExt: {
3335  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3336  "Unexpected extend");
3337 
3338  // Handle zero-extension from i1 to i8, which is common.
3339  if (ArgVT == MVT::i1) {
3340  // Set the high bits to zero.
3341  ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg);
3342  ArgVT = MVT::i8;
3343 
3344  if (ArgReg == 0)
3345  return false;
3346  }
3347 
3348  bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3349  ArgVT, ArgReg);
3350  assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
3351  ArgVT = VA.getLocVT();
3352  break;
3353  }
3354  case CCValAssign::AExt: {
3355  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3356  "Unexpected extend");
3357  bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
3358  ArgVT, ArgReg);
3359  if (!Emitted)
3360  Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3361  ArgVT, ArgReg);
3362  if (!Emitted)
3363  Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3364  ArgVT, ArgReg);
3365 
3366  assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
3367  ArgVT = VA.getLocVT();
3368  break;
3369  }
3370  case CCValAssign::BCvt: {
3371  ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg);
3372  assert(ArgReg && "Failed to emit a bitcast!");
3373  ArgVT = VA.getLocVT();
3374  break;
3375  }
3376  case CCValAssign::VExt:
3377  // VExt has not been implemented, so this should be impossible to reach
3378  // for now. However, fallback to Selection DAG isel once implemented.
3379  return false;
3383  case CCValAssign::FPExt:
3384  case CCValAssign::Trunc:
3385  llvm_unreachable("Unexpected loc info!");
3386  case CCValAssign::Indirect:
3387  // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3388  // support this.
3389  return false;
3390  }
3391 
3392  if (VA.isRegLoc()) {
3393  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3394  TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3395  OutRegs.push_back(VA.getLocReg());
3396  } else {
3397  assert(VA.isMemLoc() && "Unknown value location!");
3398 
3399  // Don't emit stores for undef values.
3400  if (isa<UndefValue>(ArgVal))
3401  continue;
3402 
3403  unsigned LocMemOffset = VA.getLocMemOffset();
3404  X86AddressMode AM;
3405  AM.Base.Reg = RegInfo->getStackRegister();
3406  AM.Disp = LocMemOffset;
3407  ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
3408  Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3409  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3410  MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
3411  MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3412  if (Flags.isByVal()) {
3413  X86AddressMode SrcAM;
3414  SrcAM.Base.Reg = ArgReg;
3415  if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
3416  return false;
3417  } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3418  // If this is a really simple value, emit this with the Value* version
3419  // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3420  // as it can cause us to reevaluate the argument.
3421  if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3422  return false;
3423  } else {
3424  if (!X86FastEmitStore(ArgVT, ArgReg, AM, MMO))
3425  return false;
3426  }
3427  }
3428  }
3429 
3430  // ELF / PIC requires GOT in the EBX register before function calls via PLT
3431  // GOT pointer.
3432  if (Subtarget->isPICStyleGOT()) {
3433  unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3434  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3435  TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
3436  }
3437 
3438  if (Is64Bit && IsVarArg && !IsWin64) {
3439  // From AMD64 ABI document:
3440  // For calls that may call functions that use varargs or stdargs
3441  // (prototype-less calls or calls to functions containing ellipsis (...) in
3442  // the declaration) %al is used as hidden argument to specify the number
3443  // of SSE registers used. The contents of %al do not need to match exactly
3444  // the number of registers, but must be an ubound on the number of SSE
3445  // registers used and is in the range 0 - 8 inclusive.
3446 
3447  // Count the number of XMM registers allocated.
3448  static const MCPhysReg XMMArgRegs[] = {
3449  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3450  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3451  };
3452  unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3453  assert((Subtarget->hasSSE1() || !NumXMMRegs)
3454  && "SSE registers cannot be used when SSE is disabled");
3455  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
3456  X86::AL).addImm(NumXMMRegs);
3457  }
3458 
3459  // Materialize callee address in a register. FIXME: GV address can be
3460  // handled with a CALLpcrel32 instead.
3461  X86AddressMode CalleeAM;
3462  if (!X86SelectCallAddress(Callee, CalleeAM))
3463  return false;
3464 
3465  unsigned CalleeOp = 0;
3466  const GlobalValue *GV = nullptr;
3467  if (CalleeAM.GV != nullptr) {
3468  GV = CalleeAM.GV;
3469  } else if (CalleeAM.Base.Reg != 0) {
3470  CalleeOp = CalleeAM.Base.Reg;
3471  } else
3472  return false;
3473 
3474  // Issue the call.
3475  MachineInstrBuilder MIB;
3476  if (CalleeOp) {
3477  // Register-indirect call.
3478  unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3479  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
3480  .addReg(CalleeOp);
3481  } else {
3482  // Direct call.
3483  assert(GV && "Not a direct call");
3484  // See if we need any target-specific flags on the GV operand.
3485  unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3486 
3487  // This will be a direct call, or an indirect call through memory for
3488  // NonLazyBind calls or dllimport calls.
3489  bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT ||
3490  OpFlags == X86II::MO_GOTPCREL ||
3491  OpFlags == X86II::MO_COFFSTUB;
3492  unsigned CallOpc = NeedLoad
3493  ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3494  : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3495 
3496  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
3497  if (NeedLoad)
3498  MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
3499  if (Symbol)
3500  MIB.addSym(Symbol, OpFlags);
3501  else
3502  MIB.addGlobalAddress(GV, 0, OpFlags);
3503  if (NeedLoad)
3504  MIB.addReg(0);
3505  }
3506 
3507  // Add a register mask operand representing the call-preserved registers.
3508  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3509  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3510 
3511  // Add an implicit use GOT pointer in EBX.
3512  if (Subtarget->isPICStyleGOT())
3514 
3515  if (Is64Bit && IsVarArg && !IsWin64)
3517 
3518  // Add implicit physical register uses to the call.
3519  for (auto Reg : OutRegs)
3521 
3522  // Issue CALLSEQ_END
3523  unsigned NumBytesForCalleeToPop =
3524  X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
3525  TM.Options.GuaranteedTailCallOpt)
3526  ? NumBytes // Callee pops everything.
3527  : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CB);
3528  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3529  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3530  .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
3531 
3532  // Now handle call return values.
3534  CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3535  CLI.RetTy->getContext());
3536  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3537 
3538  // Copy all of the result registers out of their specified physreg.
3539  Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3540  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3541  CCValAssign &VA = RVLocs[i];
3542  EVT CopyVT = VA.getValVT();
3543  unsigned CopyReg = ResultReg + i;
3544  Register SrcReg = VA.getLocReg();
3545 
3546  // If this is x86-64, and we disabled SSE, we can't return FP values
3547  if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3548  ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3549  report_fatal_error("SSE register return with SSE disabled");
3550  }
3551 
3552  // If we prefer to use the value in xmm registers, copy it out as f80 and
3553  // use a truncate to move it from fp stack reg to xmm reg.
3554  if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3555  isScalarFPTypeInSSEReg(VA.getValVT())) {
3556  CopyVT = MVT::f80;
3557  CopyReg = createResultReg(&X86::RFP80RegClass);
3558  }
3559 
3560  // Copy out the result.
3561  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3562  TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
3563  InRegs.push_back(VA.getLocReg());
3564 
3565  // Round the f80 to the right size, which also moves it to the appropriate
3566  // xmm register. This is accomplished by storing the f80 value in memory
3567  // and then loading it back.
3568  if (CopyVT != VA.getValVT()) {
3569  EVT ResVT = VA.getValVT();
3570  unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3571  unsigned MemSize = ResVT.getSizeInBits()/8;
3572  int FI = MFI.CreateStackObject(MemSize, Align(MemSize), false);
3573  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3574  TII.get(Opc)), FI)
3575  .addReg(CopyReg);
3576  Opc = ResVT == MVT::f32 ? X86::MOVSSrm_alt : X86::MOVSDrm_alt;
3577  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3578  TII.get(Opc), ResultReg + i), FI);
3579  }
3580  }
3581 
3582  CLI.ResultReg = ResultReg;
3583  CLI.NumResultRegs = RVLocs.size();
3584  CLI.Call = MIB;
3585 
3586  return true;
3587 }
3588 
3589 bool
3590 X86FastISel::fastSelectInstruction(const Instruction *I) {
3591  switch (I->getOpcode()) {
3592  default: break;
3593  case Instruction::Load:
3594  return X86SelectLoad(I);
3595  case Instruction::Store:
3596  return X86SelectStore(I);
3597  case Instruction::Ret:
3598  return X86SelectRet(I);
3599  case Instruction::ICmp:
3600  case Instruction::FCmp:
3601  return X86SelectCmp(I);
3602  case Instruction::ZExt:
3603  return X86SelectZExt(I);
3604  case Instruction::SExt:
3605  return X86SelectSExt(I);
3606  case Instruction::Br:
3607  return X86SelectBranch(I);
3608  case Instruction::LShr:
3609  case Instruction::AShr:
3610  case Instruction::Shl:
3611  return X86SelectShift(I);
3612  case Instruction::SDiv:
3613  case Instruction::UDiv:
3614  case Instruction::SRem:
3615  case Instruction::URem:
3616  return X86SelectDivRem(I);
3617  case Instruction::Select:
3618  return X86SelectSelect(I);
3619  case Instruction::Trunc:
3620  return X86SelectTrunc(I);
3621  case Instruction::FPExt:
3622  return X86SelectFPExt(I);
3623  case Instruction::FPTrunc:
3624  return X86SelectFPTrunc(I);
3625  case Instruction::SIToFP:
3626  return X86SelectSIToFP(I);
3627  case Instruction::UIToFP:
3628  return X86SelectUIToFP(I);
3629  case Instruction::IntToPtr: // Deliberate fall-through.
3630  case Instruction::PtrToInt: {
3631  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3632  EVT DstVT = TLI.getValueType(DL, I->getType());
3633  if (DstVT.bitsGT(SrcVT))
3634  return X86SelectZExt(I);
3635  if (DstVT.bitsLT(SrcVT))
3636  return X86SelectTrunc(I);
3637  Register Reg = getRegForValue(I->getOperand(0));
3638  if (Reg == 0) return false;
3639  updateValueMap(I, Reg);
3640  return true;
3641  }
3642  case Instruction::BitCast: {
3643  // Select SSE2/AVX bitcasts between 128/256/512 bit vector types.
3644  if (!Subtarget->hasSSE2())
3645  return false;
3646 
3647  MVT SrcVT, DstVT;
3648  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT) ||
3649  !isTypeLegal(I->getType(), DstVT))
3650  return false;
3651 
3652  // Only allow vectors that use xmm/ymm/zmm.
3653  if (!SrcVT.isVector() || !DstVT.isVector() ||
3654  SrcVT.getVectorElementType() == MVT::i1 ||
3655  DstVT.getVectorElementType() == MVT::i1)
3656  return false;
3657 
3658  Register Reg = getRegForValue(I->getOperand(0));
3659  if (!Reg)
3660  return false;
3661 
3662  // Emit a reg-reg copy so we don't propagate cached known bits information
3663  // with the wrong VT if we fall out of fast isel after selecting this.
3664  const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT);
3665  Register ResultReg = createResultReg(DstClass);
3666  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3667  TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg);
3668 
3669  updateValueMap(I, ResultReg);
3670  return true;
3671  }
3672  }
3673 
3674  return false;
3675 }
3676 
3677 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
3678  if (VT > MVT::i64)
3679  return 0;
3680 
3681  uint64_t Imm = CI->getZExtValue();
3682  if (Imm == 0) {
3683  Register SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3684  switch (VT.SimpleTy) {
3685  default: llvm_unreachable("Unexpected value type");
3686  case MVT::i1:
3687  case MVT::i8:
3688  return fastEmitInst_extractsubreg(MVT::i8, SrcReg, X86::sub_8bit);
3689  case MVT::i16:
3690  return fastEmitInst_extractsubreg(MVT::i16, SrcReg, X86::sub_16bit);
3691  case MVT::i32:
3692  return SrcReg;
3693  case MVT::i64: {
3694  Register ResultReg = createResultReg(&X86::GR64RegClass);
3695  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3696  TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3697  .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3698  return ResultReg;
3699  }
3700  }
3701  }
3702 
3703  unsigned Opc = 0;
3704  switch (VT.SimpleTy) {
3705  default: llvm_unreachable("Unexpected value type");
3706  case MVT::i1:
3707  VT = MVT::i8;
3709  case MVT::i8: Opc = X86::MOV8ri; break;
3710  case MVT::i16: Opc = X86::MOV16ri; break;
3711  case MVT::i32: Opc = X86::MOV32ri; break;
3712  case MVT::i64: {
3713  if (isUInt<32>(Imm))
3714  Opc = X86::MOV32ri64;
3715  else if (isInt<32>(Imm))
3716  Opc = X86::MOV64ri32;
3717  else
3718  Opc = X86::MOV64ri;
3719  break;
3720  }
3721  }
3722  return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3723 }
3724 
3725 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
3726  if (CFP->isNullValue())
3727  return fastMaterializeFloatZero(CFP);
3728 
3729  // Can't handle alternate code models yet.
3730  CodeModel::Model CM = TM.getCodeModel();
3731  if (CM != CodeModel::Small && CM != CodeModel::Large)
3732  return 0;
3733 
3734  // Get opcode and regclass of the output for the given load instruction.
3735  unsigned Opc = 0;
3736  bool HasAVX = Subtarget->hasAVX();
3737  bool HasAVX512 = Subtarget->hasAVX512();
3738  switch (VT.SimpleTy) {
3739  default: return 0;
3740  case MVT::f32:
3741  if (X86ScalarSSEf32)
3742  Opc = HasAVX512 ? X86::VMOVSSZrm_alt :
3743  HasAVX ? X86::VMOVSSrm_alt :
3744  X86::MOVSSrm_alt;
3745  else
3746  Opc = X86::LD_Fp32m;
3747  break;
3748  case MVT::f64:
3749  if (X86ScalarSSEf64)
3750  Opc = HasAVX512 ? X86::VMOVSDZrm_alt :
3751  HasAVX ? X86::VMOVSDrm_alt :
3752  X86::MOVSDrm_alt;
3753  else
3754  Opc = X86::LD_Fp64m;
3755  break;
3756  case MVT::f80:
3757  // No f80 support yet.
3758  return 0;
3759  }
3760 
3761  // MachineConstantPool wants an explicit alignment.
3762  Align Alignment = DL.getPrefTypeAlign(CFP->getType());
3763 
3764  // x86-32 PIC requires a PIC base register for constant pools.
3765  unsigned PICBase = 0;
3766  unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
3767  if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
3768  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3769  else if (OpFlag == X86II::MO_GOTOFF)
3770  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3771  else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
3772  PICBase = X86::RIP;
3773 
3774  // Create the load from the constant pool.
3775  unsigned CPI = MCP.getConstantPoolIndex(CFP, Alignment);
3776  Register ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
3777 
3778  // Large code model only applies to 64-bit mode.
3779  if (Subtarget->is64Bit() && CM == CodeModel::Large) {
3780  Register AddrReg = createResultReg(&X86::GR64RegClass);
3781  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3782  AddrReg)
3783  .addConstantPoolIndex(CPI, 0, OpFlag);
3784  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3785  TII.get(Opc), ResultReg);
3786  addRegReg(MIB, AddrReg, false, PICBase, false);
3787  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3789  MachineMemOperand::MOLoad, DL.getPointerSize(), Alignment);
3790  MIB->addMemOperand(*FuncInfo.MF, MMO);
3791  return ResultReg;
3792  }
3793 
3794  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3795  TII.get(Opc), ResultReg),
3796  CPI, PICBase, OpFlag);
3797  return ResultReg;
3798 }
3799 
3800 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
3801  // Can't handle alternate code models yet.
3802  if (TM.getCodeModel() != CodeModel::Small)
3803  return 0;
3804 
3805  // Materialize addresses with LEA/MOV instructions.
3806  X86AddressMode AM;
3807  if (X86SelectAddress(GV, AM)) {
3808  // If the expression is just a basereg, then we're done, otherwise we need
3809  // to emit an LEA.
3810  if (AM.BaseType == X86AddressMode::RegBase &&
3811  AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3812  return AM.Base.Reg;
3813 
3814  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3815  if (TM.getRelocationModel() == Reloc::Static &&
3816  TLI.getPointerTy(DL) == MVT::i64) {
3817  // The displacement code could be more than 32 bits away so we need to use
3818  // an instruction with a 64 bit immediate
3819  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
3820  ResultReg)
3821  .addGlobalAddress(GV);
3822  } else {
3823  unsigned Opc =
3824  TLI.getPointerTy(DL) == MVT::i32
3825  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3826  : X86::LEA64r;
3827  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3828  TII.get(Opc), ResultReg), AM);
3829  }
3830  return ResultReg;
3831  }
3832  return 0;
3833 }
3834 
3835 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
3836  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
3837 
3838  // Only handle simple types.
3839  if (!CEVT.isSimple())
3840  return 0;
3841  MVT VT = CEVT.getSimpleVT();
3842 
3843  if (const auto *CI = dyn_cast<ConstantInt>(C))
3844  return X86MaterializeInt(CI, VT);
3845  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
3846  return X86MaterializeFP(CFP, VT);
3847  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
3848  return X86MaterializeGV(GV, VT);
3849  else if (isa<UndefValue>(C)) {
3850  unsigned Opc = 0;
3851  switch (VT.SimpleTy) {
3852  default:
3853  break;
3854  case MVT::f32:
3855  if (!X86ScalarSSEf32)
3856  Opc = X86::LD_Fp032;
3857  break;
3858  case MVT::f64:
3859  if (!X86ScalarSSEf64)
3860  Opc = X86::LD_Fp064;
3861  break;
3862  case MVT::f80:
3863  Opc = X86::LD_Fp080;
3864  break;
3865  }
3866 
3867  if (Opc) {
3868  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3869  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
3870  ResultReg);
3871  return ResultReg;
3872  }
3873  }
3874 
3875  return 0;
3876 }
3877 
3878 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
3879  // Fail on dynamic allocas. At this point, getRegForValue has already
3880  // checked its CSE maps, so if we're here trying to handle a dynamic
3881  // alloca, we're not going to succeed. X86SelectAddress has a
3882  // check for dynamic allocas, because it's called directly from
3883  // various places, but targetMaterializeAlloca also needs a check
3884  // in order to avoid recursion between getRegForValue,
3885  // X86SelectAddrss, and targetMaterializeAlloca.
3886  if (!FuncInfo.StaticAllocaMap.count(C))
3887  return 0;
3888  assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3889 
3890  X86AddressMode AM;
3891  if (!X86SelectAddress(C, AM))
3892  return 0;
3893  unsigned Opc =
3894  TLI.getPointerTy(DL) == MVT::i32
3895  ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3896  : X86::LEA64r;
3897  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
3898  Register ResultReg = createResultReg(RC);
3899  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3900  TII.get(Opc), ResultReg), AM);
3901  return ResultReg;
3902 }
3903 
3904 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
3905  MVT VT;
3906  if (!isTypeLegal(CF->getType(), VT))
3907  return 0;
3908 
3909  // Get opcode and regclass for the given zero.
3910  bool HasAVX512 = Subtarget->hasAVX512();
3911  unsigned Opc = 0;
3912  switch (VT.SimpleTy) {
3913  default: return 0;
3914  case MVT::f32:
3915  if (X86ScalarSSEf32)
3916  Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS;
3917  else
3918  Opc = X86::LD_Fp032;
3919  break;
3920  case MVT::f64:
3921  if (X86ScalarSSEf64)
3922  Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD;
3923  else
3924  Opc = X86::LD_Fp064;
3925  break;
3926  case MVT::f80:
3927  // No f80 support yet.
3928  return 0;
3929  }
3930 
3931  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3932  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
3933  return ResultReg;
3934 }
3935 
3936 
3937 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3938  const LoadInst *LI) {
3939  const Value *Ptr = LI->getPointerOperand();
3940  X86AddressMode AM;
3941  if (!X86SelectAddress(Ptr, AM))
3942  return false;
3943 
3944  const X86InstrInfo &XII = (const X86InstrInfo &)TII;
3945 
3946  unsigned Size = DL.getTypeAllocSize(LI->getType());
3947 
3949  AM.getFullAddress(AddrOps);
3950 
3952  *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, LI->getAlign(),
3953  /*AllowCommute=*/true);
3954  if (!Result)
3955  return false;
3956 
3957  // The index register could be in the wrong register class. Unfortunately,
3958  // foldMemoryOperandImpl could have commuted the instruction so its not enough
3959  // to just look at OpNo + the offset to the index reg. We actually need to
3960  // scan the instruction to find the index reg and see if its the correct reg
3961  // class.
3962  unsigned OperandNo = 0;
3963  for (MachineInstr::mop_iterator I = Result->operands_begin(),
3964  E = Result->operands_end(); I != E; ++I, ++OperandNo) {
3965  MachineOperand &MO = *I;
3966  if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
3967  continue;
3968  // Found the index reg, now try to rewrite it.
3969  Register IndexReg = constrainOperandRegClass(Result->getDesc(),
3970  MO.getReg(), OperandNo);
3971  if (IndexReg == MO.getReg())
3972  continue;
3973  MO.setReg(IndexReg);
3974  }
3975 
3976  Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3977  Result->cloneInstrSymbols(*FuncInfo.MF, *MI);
3979  removeDeadCode(I, std::next(I));
3980  return true;
3981 }
3982 
3983 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
3984  const TargetRegisterClass *RC,
3985  unsigned Op0, unsigned Op1,
3986  unsigned Op2, unsigned Op3) {
3987  const MCInstrDesc &II = TII.get(MachineInstOpcode);
3988 
3989  Register ResultReg = createResultReg(RC);
3990  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
3991  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
3992  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
3993  Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
3994 
3995  if (II.getNumDefs() >= 1)
3996  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
3997  .addReg(Op0)
3998  .addReg(Op1)
3999  .addReg(Op2)
4000  .addReg(Op3);
4001  else {
4002  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
4003  .addReg(Op0)
4004  .addReg(Op1)
4005  .addReg(Op2)
4006  .addReg(Op3);
4007  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4008  TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
4009  }
4010  return ResultReg;
4011 }
4012 
4013 
4014 namespace llvm {
4016  const TargetLibraryInfo *libInfo) {
4017  return new X86FastISel(funcInfo, libInfo);
4018  }
4019 }
llvm::CallingConv::C
@ C
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::addRegReg
static const MachineInstrBuilder & addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2)
addRegReg - This function is used to add a memory reference of the form: [Reg + Reg].
Definition: X86InstrBuilder.h:164
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:153
i
i
Definition: README.txt:29
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:243
llvm::CmpInst::FCMP_ULE
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:735
llvm::CCValAssign::ZExt
@ ZExt
Definition: CallingConvLower.h:38
llvm::DbgVariableIntrinsic::getExpression
DIExpression * getExpression() const
Definition: IntrinsicInst.h:257
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
llvm::MVT::getVectorElementType
MVT getVectorElementType() const
Definition: MachineValueType.h:519
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4636
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1072
llvm::CallingConv::SwiftTail
@ SwiftTail
SwiftTail - This follows the Swift calling convention in how arguments are passed but guarantees tail...
Definition: CallingConv.h:92
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::DbgDeclareInst::getAddress
Value * getAddress() const
Definition: IntrinsicInst.h:310
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
llvm::AArch64CC::AL
@ AL
Definition: AArch64BaseInfo.h:269
llvm::N86::ECX
@ ECX
Definition: X86MCTargetDesc.h:51
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:2986
llvm::CCValAssign::Full
@ Full
Definition: CallingConvLower.h:36
llvm::MVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: MachineValueType.h:350
llvm::X86Subtarget::hasSSE2
bool hasSSE2() const
Definition: X86Subtarget.h:638
llvm::CCValAssign::SExtUpper
@ SExtUpper
Definition: CallingConvLower.h:40
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:435
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:848
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::X86AddressMode
X86AddressMode - This struct holds a generalized full x86 address mode.
Definition: X86InstrBuilder.h:42
IntrinsicInst.h
X86Subtarget.h
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:191
llvm::ConstantExpr::getZExt
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2098
llvm::Function
Definition: Function.h:61
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:52
X86InstrBuilder.h
llvm::MachinePointerInfo::getConstantPool
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
Definition: MachineOperand.cpp:997
X86SelectAddress
static void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, X86AddressMode &AM)
Definition: X86InstructionSelector.cpp:474
GetElementPtrTypeIterator.h
llvm::ConstantExpr::getSExt
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2084
llvm::MemIntrinsicBase::getDestAddressSpace
unsigned getDestAddressSpace() const
Definition: IntrinsicInst.h:654
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::X86AddressMode::GV
const GlobalValue * GV
Definition: X86InstrBuilder.h:56
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:366
llvm::CmpInst::FCMP_ONE
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:728
llvm::X86Subtarget
Definition: X86Subtarget.h:52
ErrorHandling.h
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::X86::COND_P
@ COND_P
Definition: X86BaseInfo.h:91
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:742
llvm::CmpInst::getInversePredicate
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:820
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:732
llvm::N86::ESI
@ ESI
Definition: X86MCTargetDesc.h:51
llvm::CCValAssign::VExt
@ VExt
Definition: CallingConvLower.h:48
llvm::CallingConv::X86_StdCall
@ X86_StdCall
X86_StdCall - stdcall is the calling conventions mostly used by the Win32 API.
Definition: CallingConv.h:102
llvm::CCValAssign::Indirect
@ Indirect
Definition: CallingConvLower.h:52
llvm::X86Subtarget::isTargetMCU
bool isTargetMCU() const
Definition: X86Subtarget.h:859
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:486
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:333
llvm::RegState::Implicit
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Definition: MachineInstrBuilder.h:46
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
llvm::N86::EDI
@ EDI
Definition: X86MCTargetDesc.h:51
llvm::DbgVariableIntrinsic::getVariable
DILocalVariable * getVariable() const
Definition: IntrinsicInst.h:253
llvm::CCValAssign::BCvt
@ BCvt
Definition: CallingConvLower.h:46
llvm::CCValAssign::AExtUpper
@ AExtUpper
Definition: CallingConvLower.h:44
Operator.h
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::CallBase::getNumArgOperands
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1336
llvm::CCValAssign::ZExtUpper
@ ZExtUpper
Definition: CallingConvLower.h:42
llvm::X86::CondCode
CondCode
Definition: X86BaseInfo.h:80
llvm::X86ISD::SMUL
@ SMUL
Definition: X86ISelLowering.h:402
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:267
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:139
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::MVT::v2f64
@ v2f64
Definition: MachineValueType.h:172
llvm::CCValAssign::Trunc
@ Trunc
Definition: CallingConvLower.h:47
llvm::X86II::MO_GOTOFF
@ MO_GOTOFF
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:434
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:223
llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:724
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:203
llvm::MachineInstr::addMemOperand
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
Definition: MachineInstr.cpp:382
llvm::X86::COND_O
@ COND_O
Definition: X86BaseInfo.h:81
F
#define F(x, y, z)
Definition: MD5.cpp:56
MachineRegisterInfo.h
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:130
llvm::X86AddressMode::GVOpFlags
unsigned GVOpFlags
Definition: X86InstrBuilder.h:57
llvm::CmpInst::FCMP_ULT
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:734
llvm::CallingConv::Win64
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:169
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Type::isArrayTy
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:225
X86MachineFunctionInfo.h
llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition: TargetRegisterInfo.h:93
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::CCValAssign::AExt
@ AExt
Definition: CallingConvLower.h:39
llvm::MemTransferBase::getSourceAddressSpace
unsigned getSourceAddressSpace() const
Definition: IntrinsicInst.h:719
llvm::X86II::MO_COFFSTUB
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: X86BaseInfo.h:570
X86.h
llvm::MVT::v8f64
@ v8f64
Definition: MachineValueType.h:175
llvm::addConstantPoolReference
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
addConstantPoolReference - This function is used to add a reference to the base of a constant value s...
Definition: X86InstrBuilder.h:223
llvm::addFullAddress
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
Definition: X86InstrBuilder.h:172
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:33
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:729
llvm::ISD::ArgFlagsTy::isByVal
bool isByVal() const
Definition: TargetCallingConv.h:85
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:74
FunctionLoweringInfo.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:150
llvm::User
Definition: User.h:44
llvm::addDirectMem
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – th...
Definition: X86InstrBuilder.h:124
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:56
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1393
llvm::X86II::MO_GOTPCREL
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:442
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::X86AddressMode::Scale
unsigned Scale
Definition: X86InstrBuilder.h:53
llvm::MVT::x86mmx
@ x86mmx
Definition: MachineValueType.h:260
MCSymbol.h
llvm::MemTransferBase::getRawSource
Value * getRawSource() const
Return the arguments to the instruction.
Definition: IntrinsicInst.h:706
llvm::CmpInst::FCMP_UGE
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:733
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:321
llvm::RetCC_X86
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:237
llvm::addFrameReference
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
Definition: PPCInstrBuilder.h:32
llvm::Mips::GPRIdx
@ GPRIdx
Definition: MipsRegisterBankInfo.cpp:44
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::X86Subtarget::hasSSE1
bool hasSSE1() const
Definition: X86Subtarget.h:637
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:28
llvm::ISD::NodeType
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
llvm::MVT::v4f64
@ v4f64
Definition: MachineValueType.h:174
llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:730
llvm::Instruction
Definition: Instruction.h:45
llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:146
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:153
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::DILocalVariable::isValidLocationForIntrinsic
bool isValidLocationForIntrinsic(const DILocation *DL) const
Check that a location is valid for this variable.
Definition: DebugInfoMetadata.h:3174
llvm::CmpInst::FCMP_OEQ
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:723
llvm::N86::EBX
@ EBX
Definition: X86MCTargetDesc.h:51
llvm::CmpInst::FCMP_OLT
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:726
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:632
llvm::CCValAssign::getLocInfo
LocInfo getLocInfo() const
Definition: CallingConvLower.h:155
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::X86AddressMode::Reg
unsigned Reg
Definition: X86InstrBuilder.h:49
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:151
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:273
llvm::CCValAssign::isRegLoc
bool isRegLoc() const
Definition: CallingConvLower.h:145
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::Triple::isOSMSVCRT
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:584
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::MemSetInst
This class wraps the llvm.memset intrinsic.
Definition: IntrinsicInst.h:905
llvm::BranchInst::getCondition
Value * getCondition() const
Definition: Instructions.h:3149
llvm::CmpInst::FCMP_FALSE
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:722
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::CodeModel::Model
Model
Definition: CodeGen.h:28
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:710
llvm::MVT::f80
@ f80
Definition: MachineValueType.h:57
X86ChooseCmpOpcode
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
Definition: X86FastISel.cpp:1361
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:201
llvm::isInt< 8 >
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:367
llvm::MVT::v4i64
@ v4i64
Definition: MachineValueType.h:120
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:626
llvm::MVT::v32i16
@ v32i16
Definition: MachineValueType.h:94
llvm::X86AddressMode::IndexReg
unsigned IndexReg
Definition: X86InstrBuilder.h:54
llvm::CallingConv::X86_ThisCall
@ X86_ThisCall
X86_ThisCall - Similar to X86_StdCall.
Definition: CallingConv.h:126
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:80
BranchProbabilityInfo.h
llvm::isInt< 32 >
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:373
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:341
llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:181
llvm::CCValAssign::SExt
@ SExt
Definition: CallingConvLower.h:37
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:408
llvm::MVT::v16i16
@ v16i16
Definition: MachineValueType.h:93
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::MVT::v2i64
@ v2i64
Definition: MachineValueType.h:118
llvm::isGlobalStubReference
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
Definition: X86InstrInfo.h:75
llvm::StructLayout
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:608
uint64_t
llvm::DbgDeclareInst
This represents the llvm.dbg.declare instruction.
Definition: IntrinsicInst.h:308
llvm::CallingConv::HiPE
@ HiPE
Definition: CallingConv.h:55
llvm::X86II::MO_PIC_BASE_OFFSET
@ MO_PIC_BASE_OFFSET
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:420
llvm::CallingConv::CFGuard_Check
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:87
llvm::MVT::v16f32
@ v16f32
Definition: MachineValueType.h:162
llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4755
AH
AMD64 Optimization Manual has some nice information about optimizing integer multiplication by a constant How much of it applies to Intel s X86 implementation There are definite trade offs to xmm0 cvttss2siq rdx jb L3 subss xmm0 rax cvttss2siq rdx xorq rdx rax ret instead of xmm1 cvttss2siq rcx movaps xmm2 subss xmm2 cvttss2siq rax rdx xorq rax ucomiss xmm0 cmovb rax ret Seems like the jb branch has high likelihood of being taken It would have saved a few instructions It s not possible to reference AH
Definition: README-X86-64.txt:44
llvm::X86AddressMode::Base
union llvm::X86AddressMode::@566 Base
llvm::ISD::TRAP
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1088
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
llvm::codeview::FrameCookieKind::Copy
@ Copy
DebugInfo.h
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:20
llvm::X86RegisterInfo::getPtrSizedFrameRegister
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const
Definition: X86RegisterInfo.cpp:858
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
MachineConstantPool.h
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::CallingConv::X86_64_SysV
@ X86_64_SysV
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:159
llvm::X86ISD::UMUL
@ UMUL
Definition: X86ISelLowering.h:403
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:157
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:411
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:44
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::FunctionLoweringInfo
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Definition: FunctionLoweringInfo.h:53
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:860
llvm::CallingConv::WebKit_JS
@ WebKit_JS
Definition: CallingConv.h:58
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:642
llvm::X86::COND_B
@ COND_B
Definition: X86BaseInfo.h:83
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::CmpInst::FCMP_OGE
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:725
llvm::X86AddressMode::Disp
int Disp
Definition: X86InstrBuilder.h:55
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::X86AddressMode::FrameIndexBase
@ FrameIndexBase
Definition: X86InstrBuilder.h:45
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1355
llvm::X86TargetMachine
Definition: X86TargetMachine.h:28
llvm::X86MachineFunctionInfo
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
Definition: X86MachineFunctionInfo.h:25
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::FastISel
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:65
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::X86InstrInfo::foldMemoryOperandImpl
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
Definition: X86InstrInfo.cpp:5825
llvm::X86AddressMode::RegBase
@ RegBase
Definition: X86InstrBuilder.h:44
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::X86InstrInfo
Definition: X86InstrInfo.h:130
TargetOptions.h
llvm::CCValAssign::getValNo
unsigned getValNo() const
Definition: CallingConvLower.h:142
llvm::MachineInstrBuilder::addRegMask
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Definition: MachineInstrBuilder.h:197
llvm::TargetMachine::getMCAsmInfo
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
Definition: TargetMachine.h:207
llvm::X86II::MO_DLLIMPORT
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
Definition: X86BaseInfo.h:532
llvm::X86::isCalleePop
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
Definition: X86ISelLowering.cpp:5177
MCAsmInfo.h
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1558
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:47
llvm::MachineFrameInfo::CreateStackObject
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition: MachineFrameInfo.cpp:51
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::isGlobalRelativeToPICBase
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
Definition: X86InstrInfo.h:92
llvm::CallingConv::Tail
@ Tail
Tail - This calling convention attemps to make calls as fast as possible while guaranteeing that tail...
Definition: CallingConv.h:81
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::X86MachineFunctionInfo::getBytesToPopOnReturn
unsigned getBytesToPopOnReturn() const
Definition: X86MachineFunctionInfo.h:149
llvm::Reloc::Static
@ Static
Definition: CodeGen.h:22
uint32_t
llvm::N86::EDX
@ EDX
Definition: X86MCTargetDesc.h:51
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:990
llvm::ISD::ArgFlagsTy
Definition: TargetCallingConv.h:27
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MVT::v64i8
@ v64i8
Definition: MachineValueType.h:82
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:375
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::N86::EAX
@ EAX
Definition: X86MCTargetDesc.h:51
llvm::X86::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
Definition: X86FastISel.cpp:4015
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:148
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:103
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
llvm::CCValAssign::FPExt
@ FPExt
Definition: CallingConvLower.h:51
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::CallBase::paramHasAttr
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Definition: Instructions.cpp:341
llvm::MVT::v8i64
@ v8i64
Definition: MachineValueType.h:121
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:135
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::X86AddressMode::BaseType
enum llvm::X86AddressMode::@565 BaseType
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:108
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:162
CallingConv.h
llvm::Instruction::isAtomic
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
Definition: Instruction.cpp:604
llvm::CmpInst::FCMP_UGT
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:732
llvm::X86Subtarget::hasAVX512
bool hasAVX512() const
Definition: X86Subtarget.h:645
llvm::CCValAssign::isMemLoc
bool isMemLoc() const
Definition: CallingConvLower.h:146
llvm::ISD::ArgFlagsTy::getByValSize
unsigned getByValSize() const
Definition: TargetCallingConv.h:169
X86ChooseCmpImmediateOpcode
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
If we have a comparison with RHS as the RHS of the comparison, return an opcode that works for the co...
Definition: X86FastISel.cpp:1388
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:936
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:254
X86CallingConv.h
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:348
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:147
llvm::TargetRegisterInfo::getRegSizeInBits
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Definition: TargetRegisterInfo.h:276
llvm::MemIntrinsicBase::getLength
Value * getLength() const
Definition: IntrinsicInst.h:643
uint16_t
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:622
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
X86TargetMachine.h
llvm::StructLayout::getElementOffset
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:638
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::CC_X86
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
GlobalVariable.h
llvm::MachineInstrBuilder::addConstantPoolIndex
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:158
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:92
llvm::pdb::PDB_LocType::Slot
@ Slot
llvm::ISD::ArgFlagsTy::isSExt
bool isSExt() const
Definition: TargetCallingConv.h:76
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:46
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::CallBase::arg_empty
bool arg_empty() const
Definition: InstrTypes.h:1325
llvm::X86Subtarget::hasAVX
bool hasAVX() const
Definition: X86Subtarget.h:643
llvm::X86::getCMovOpcode
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given register size in bytes, and operand type.
Definition: X86InstrInfo.cpp:2857
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::MachineFrameInfo::setStackProtectorIndex
void setStackProtectorIndex(int I)
Definition: MachineFrameInfo.h:357
llvm::MVT::v8i32
@ v8i32
Definition: MachineValueType.h:107
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:137
GlobalAlias.h
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
X86RegisterInfo.h
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::ARMBuildAttrs::Symbol
@ Symbol
Definition: ARMBuildAttributes.h:79
llvm::X86AddressMode::FrameIndex
int FrameIndex
Definition: X86InstrBuilder.h:50
llvm::MachineInstrBuilder::addGlobalAddress
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:177
Predicate
llvm::CallingConv::Fast
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:28
llvm::getKillRegState
unsigned getKillRegState(bool B)
Definition: MachineInstrBuilder.h:508
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:107
FastISel.h
llvm::X86Subtarget::getTargetTriple
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:840
Instructions.h
llvm::MemCpyInst
This class wraps the llvm.memcpy intrinsic.
Definition: IntrinsicInst.h:936
llvm::MachineInstrBuilder::addSym
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
Definition: MachineInstrBuilder.h:267
llvm::FunctionLoweringInfo::MF
MachineFunction * MF
Definition: FunctionLoweringInfo.h:56
llvm::CallingConv::X86_FastCall
@ X86_FastCall
X86_FastCall - 'fast' analog of X86_StdCall.
Definition: CallingConv.h:107
llvm::Type::isStructTy
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:222
llvm::GetReturnInfo
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
Definition: TargetLoweringBase.cpp:1648
llvm::MemIntrinsic::isVolatile
bool isVolatile() const
Definition: IntrinsicInst.h:883
llvm::X86AddressMode::getFullAddress
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
Definition: X86InstrBuilder.h:65
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:370
llvm::X86::getX86ConditionCode
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
Definition: X86InstrInfo.cpp:2814
llvm::User::op_begin
op_iterator op_begin()
Definition: User.h:234
getX86SSEConditionCode
static std::pair< unsigned, bool > getX86SSEConditionCode(CmpInst::Predicate Predicate)
Definition: X86FastISel.cpp:187
llvm::CCValAssign::getValVT
MVT getValVT() const
Definition: CallingConvLower.h:143
llvm::MVT::f16
@ f16
Definition: MachineValueType.h:54
llvm::CmpInst::FCMP_UNE
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:736
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
computeBytesPoppedByCalleeForSRet
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, const CallBase *CB)
Definition: X86FastISel.cpp:3145
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1338
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:492
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:796
llvm::constrainOperandRegClass
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:49
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::MemIntrinsicBase::getRawDest
Value * getRawDest() const
Definition: IntrinsicInst.h:637
llvm::orc::SymbolState::Emitted
@ Emitted
Assigned address, still materializing.
llvm::MachineInstrBuilder::addMetadata
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
Definition: MachineInstrBuilder.h:236
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:157
llvm::CmpInst::FCMP_OLE
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:727
llvm::CallingConv::Swift
@ Swift
Definition: CallingConv.h:73
llvm::X86Subtarget::is64Bit
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:606
llvm::ISD::ArgFlagsTy::isInReg
bool isInReg() const
Definition: TargetCallingConv.h:79
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1161
DerivedTypes.h
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::MVT::v32i8
@ v32i8
Definition: MachineValueType.h:81
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:257
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:45
llvm::X86RegisterInfo::getStackRegister
Register getStackRegister() const
Definition: X86RegisterInfo.h:138
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MachineFrameInfo::setFrameAddressIsTaken
void setFrameAddressIsTaken(bool T)
Definition: MachineFrameInfo.h:369
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::MachineMemOperand::isNonTemporal
bool isNonTemporal() const
Definition: MachineMemOperand.h:291
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3068
llvm::X86::AddrIndexReg
@ AddrIndexReg
Definition: X86BaseInfo.h:34
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:726
llvm::X86MachineFunctionInfo::getSRetReturnReg
Register getSRetReturnReg() const
Definition: X86MachineFunctionInfo.h:161
llvm::MVT::v8f32
@ v8f32
Definition: MachineValueType.h:161
X86InstrInfo.h
llvm::CmpInst::FCMP_TRUE
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:737
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::X86::COND_E
@ COND_E
Definition: X86BaseInfo.h:85
llvm::MCAsmInfo::usesWindowsCFI
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:778
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:55
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:228
llvm::X86::LAST_VALID_COND
@ LAST_VALID_COND
Definition: X86BaseInfo.h:97
llvm::MachinePointerInfo::getStack
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
Definition: MachineOperand.cpp:1016
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::X86::COND_NP
@ COND_NP
Definition: X86BaseInfo.h:92
llvm::TargetRegisterInfo::getCallPreservedMask
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function.
Definition: TargetRegisterInfo.h:487
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:583
llvm::CallingConv::GHC
@ GHC
Definition: CallingConv.h:51
llvm::CmpInst::FCMP_ORD
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:729
llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3161
llvm::BasicBlock::const_iterator
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:91
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::CmpInst::FCMP_UEQ
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:731
llvm::X86::COND_NE
@ COND_NE
Definition: X86BaseInfo.h:86
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::MCInstrDesc::ImplicitDefs
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:205
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:289
llvm::X86RegisterInfo
Definition: X86RegisterInfo.h:24
llvm::User::op_end
op_iterator op_end()
Definition: User.h:236