LLVM 17.0.0git
X86FastISel.cpp
Go to the documentation of this file.
1//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the X86-specific support for the FastISel class. Much
10// of the target-specific code is generated by tablegen in the file
11// X86GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86.h"
16#include "X86CallingConv.h"
17#include "X86InstrBuilder.h"
18#include "X86InstrInfo.h"
20#include "X86RegisterInfo.h"
21#include "X86Subtarget.h"
22#include "X86TargetMachine.h"
29#include "llvm/IR/CallingConv.h"
30#include "llvm/IR/DebugInfo.h"
33#include "llvm/IR/GlobalAlias.h"
37#include "llvm/IR/IntrinsicsX86.h"
38#include "llvm/IR/Operator.h"
39#include "llvm/MC/MCAsmInfo.h"
40#include "llvm/MC/MCSymbol.h"
43using namespace llvm;
44
45namespace {
46
47class X86FastISel final : public FastISel {
48 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
49 /// make the right decision when generating code for different targets.
50 const X86Subtarget *Subtarget;
51
52public:
53 explicit X86FastISel(FunctionLoweringInfo &funcInfo,
54 const TargetLibraryInfo *libInfo)
55 : FastISel(funcInfo, libInfo) {
56 Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
57 }
58
59 bool fastSelectInstruction(const Instruction *I) override;
60
61 /// The specified machine instr operand is a vreg, and that
62 /// vreg is being provided by the specified load instruction. If possible,
63 /// try to fold the load as an operand to the instruction, returning true if
64 /// possible.
65 bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
66 const LoadInst *LI) override;
67
68 bool fastLowerArguments() override;
69 bool fastLowerCall(CallLoweringInfo &CLI) override;
70 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
71
72#include "X86GenFastISel.inc"
73
74private:
75 bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
76 const DebugLoc &DL);
77
78 bool X86FastEmitLoad(MVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
79 unsigned &ResultReg, unsigned Alignment = 1);
80
81 bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
82 MachineMemOperand *MMO = nullptr, bool Aligned = false);
83 bool X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM,
84 MachineMemOperand *MMO = nullptr, bool Aligned = false);
85
86 bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
87 unsigned &ResultReg);
88
89 bool X86SelectAddress(const Value *V, X86AddressMode &AM);
90 bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
91
92 bool X86SelectLoad(const Instruction *I);
93
94 bool X86SelectStore(const Instruction *I);
95
96 bool X86SelectRet(const Instruction *I);
97
98 bool X86SelectCmp(const Instruction *I);
99
100 bool X86SelectZExt(const Instruction *I);
101
102 bool X86SelectSExt(const Instruction *I);
103
104 bool X86SelectBranch(const Instruction *I);
105
106 bool X86SelectShift(const Instruction *I);
107
108 bool X86SelectDivRem(const Instruction *I);
109
110 bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
111
112 bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
113
114 bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
115
116 bool X86SelectSelect(const Instruction *I);
117
118 bool X86SelectTrunc(const Instruction *I);
119
120 bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
121 const TargetRegisterClass *RC);
122
123 bool X86SelectFPExt(const Instruction *I);
124 bool X86SelectFPTrunc(const Instruction *I);
125 bool X86SelectSIToFP(const Instruction *I);
126 bool X86SelectUIToFP(const Instruction *I);
127 bool X86SelectIntToFP(const Instruction *I, bool IsSigned);
128
129 const X86InstrInfo *getInstrInfo() const {
130 return Subtarget->getInstrInfo();
131 }
132 const X86TargetMachine *getTargetMachine() const {
133 return static_cast<const X86TargetMachine *>(&TM);
134 }
135
136 bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
137
138 unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
139 unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
140 unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
141 unsigned fastMaterializeConstant(const Constant *C) override;
142
143 unsigned fastMaterializeAlloca(const AllocaInst *C) override;
144
145 unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
146
147 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
148 /// computed in an SSE register, not on the X87 floating point stack.
149 bool isScalarFPTypeInSSEReg(EVT VT) const {
150 return (VT == MVT::f64 && Subtarget->hasSSE2()) ||
151 (VT == MVT::f32 && Subtarget->hasSSE1()) || VT == MVT::f16;
152 }
153
154 bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
155
156 bool IsMemcpySmall(uint64_t Len);
157
158 bool TryEmitSmallMemcpy(X86AddressMode DestAM,
159 X86AddressMode SrcAM, uint64_t Len);
160
161 bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
162 const Value *Cond);
163
165 X86AddressMode &AM);
166
167 unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
168 const TargetRegisterClass *RC, unsigned Op0,
169 unsigned Op1, unsigned Op2, unsigned Op3);
170};
171
172} // end anonymous namespace.
173
174static std::pair<unsigned, bool>
176 unsigned CC;
177 bool NeedSwap = false;
178
179 // SSE Condition code mapping:
180 // 0 - EQ
181 // 1 - LT
182 // 2 - LE
183 // 3 - UNORD
184 // 4 - NEQ
185 // 5 - NLT
186 // 6 - NLE
187 // 7 - ORD
188 switch (Predicate) {
189 default: llvm_unreachable("Unexpected predicate");
190 case CmpInst::FCMP_OEQ: CC = 0; break;
191 case CmpInst::FCMP_OGT: NeedSwap = true; [[fallthrough]];
192 case CmpInst::FCMP_OLT: CC = 1; break;
193 case CmpInst::FCMP_OGE: NeedSwap = true; [[fallthrough]];
194 case CmpInst::FCMP_OLE: CC = 2; break;
195 case CmpInst::FCMP_UNO: CC = 3; break;
196 case CmpInst::FCMP_UNE: CC = 4; break;
197 case CmpInst::FCMP_ULE: NeedSwap = true; [[fallthrough]];
198 case CmpInst::FCMP_UGE: CC = 5; break;
199 case CmpInst::FCMP_ULT: NeedSwap = true; [[fallthrough]];
200 case CmpInst::FCMP_UGT: CC = 6; break;
201 case CmpInst::FCMP_ORD: CC = 7; break;
202 case CmpInst::FCMP_UEQ: CC = 8; break;
203 case CmpInst::FCMP_ONE: CC = 12; break;
204 }
205
206 return std::make_pair(CC, NeedSwap);
207}
208
209/// Adds a complex addressing mode to the given machine instr builder.
210/// Note, this will constrain the index register. If its not possible to
211/// constrain the given index register, then a new one will be created. The
212/// IndexReg field of the addressing mode will be updated to match in this case.
214X86FastISel::addFullAddress(const MachineInstrBuilder &MIB,
215 X86AddressMode &AM) {
216 // First constrain the index register. It needs to be a GR64_NOSP.
218 MIB->getNumOperands() +
220 return ::addFullAddress(MIB, AM);
221}
222
223/// Check if it is possible to fold the condition from the XALU intrinsic
224/// into the user. The condition code will only be updated on success.
225bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
226 const Value *Cond) {
227 if (!isa<ExtractValueInst>(Cond))
228 return false;
229
230 const auto *EV = cast<ExtractValueInst>(Cond);
231 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
232 return false;
233
234 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
235 MVT RetVT;
236 const Function *Callee = II->getCalledFunction();
237 Type *RetTy =
238 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
239 if (!isTypeLegal(RetTy, RetVT))
240 return false;
241
242 if (RetVT != MVT::i32 && RetVT != MVT::i64)
243 return false;
244
245 X86::CondCode TmpCC;
246 switch (II->getIntrinsicID()) {
247 default: return false;
248 case Intrinsic::sadd_with_overflow:
249 case Intrinsic::ssub_with_overflow:
250 case Intrinsic::smul_with_overflow:
251 case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
252 case Intrinsic::uadd_with_overflow:
253 case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
254 }
255
256 // Check if both instructions are in the same basic block.
257 if (II->getParent() != I->getParent())
258 return false;
259
260 // Make sure nothing is in the way
263 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
264 // We only expect extractvalue instructions between the intrinsic and the
265 // instruction to be selected.
266 if (!isa<ExtractValueInst>(Itr))
267 return false;
268
269 // Check that the extractvalue operand comes from the intrinsic.
270 const auto *EVI = cast<ExtractValueInst>(Itr);
271 if (EVI->getAggregateOperand() != II)
272 return false;
273 }
274
275 // Make sure no potentially eflags clobbering phi moves can be inserted in
276 // between.
277 auto HasPhis = [](const BasicBlock *Succ) { return !Succ->phis().empty(); };
278 if (I->isTerminator() && llvm::any_of(successors(I), HasPhis))
279 return false;
280
281 // Make sure there are no potentially eflags clobbering constant
282 // materializations in between.
283 if (llvm::any_of(I->operands(), [](Value *V) { return isa<Constant>(V); }))
284 return false;
285
286 CC = TmpCC;
287 return true;
288}
289
290bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
291 EVT evt = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true);
292 if (evt == MVT::Other || !evt.isSimple())
293 // Unhandled type. Halt "fast" selection and bail.
294 return false;
295
296 VT = evt.getSimpleVT();
297 // For now, require SSE/SSE2 for performing floating-point operations,
298 // since x87 requires additional work.
299 if (VT == MVT::f64 && !Subtarget->hasSSE2())
300 return false;
301 if (VT == MVT::f32 && !Subtarget->hasSSE1())
302 return false;
303 // Similarly, no f80 support yet.
304 if (VT == MVT::f80)
305 return false;
306 // We only handle legal types. For example, on x86-32 the instruction
307 // selector contains all of the 64-bit instructions from x86-64,
308 // under the assumption that i64 won't be used if the target doesn't
309 // support it.
310 return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
311}
312
313/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
314/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
315/// Return true and the result register by reference if it is possible.
316bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM,
317 MachineMemOperand *MMO, unsigned &ResultReg,
318 unsigned Alignment) {
319 bool HasSSE1 = Subtarget->hasSSE1();
320 bool HasSSE2 = Subtarget->hasSSE2();
321 bool HasSSE41 = Subtarget->hasSSE41();
322 bool HasAVX = Subtarget->hasAVX();
323 bool HasAVX2 = Subtarget->hasAVX2();
324 bool HasAVX512 = Subtarget->hasAVX512();
325 bool HasVLX = Subtarget->hasVLX();
326 bool IsNonTemporal = MMO && MMO->isNonTemporal();
327
328 // Treat i1 loads the same as i8 loads. Masking will be done when storing.
329 if (VT == MVT::i1)
330 VT = MVT::i8;
331
332 // Get opcode and regclass of the output for the given load instruction.
333 unsigned Opc = 0;
334 switch (VT.SimpleTy) {
335 default: return false;
336 case MVT::i8:
337 Opc = X86::MOV8rm;
338 break;
339 case MVT::i16:
340 Opc = X86::MOV16rm;
341 break;
342 case MVT::i32:
343 Opc = X86::MOV32rm;
344 break;
345 case MVT::i64:
346 // Must be in x86-64 mode.
347 Opc = X86::MOV64rm;
348 break;
349 case MVT::f32:
350 Opc = HasAVX512 ? X86::VMOVSSZrm_alt
351 : HasAVX ? X86::VMOVSSrm_alt
352 : HasSSE1 ? X86::MOVSSrm_alt
353 : X86::LD_Fp32m;
354 break;
355 case MVT::f64:
356 Opc = HasAVX512 ? X86::VMOVSDZrm_alt
357 : HasAVX ? X86::VMOVSDrm_alt
358 : HasSSE2 ? X86::MOVSDrm_alt
359 : X86::LD_Fp64m;
360 break;
361 case MVT::f80:
362 // No f80 support yet.
363 return false;
364 case MVT::v4f32:
365 if (IsNonTemporal && Alignment >= 16 && HasSSE41)
366 Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
367 HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
368 else if (Alignment >= 16)
369 Opc = HasVLX ? X86::VMOVAPSZ128rm :
370 HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
371 else
372 Opc = HasVLX ? X86::VMOVUPSZ128rm :
373 HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
374 break;
375 case MVT::v2f64:
376 if (IsNonTemporal && Alignment >= 16 && HasSSE41)
377 Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
378 HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
379 else if (Alignment >= 16)
380 Opc = HasVLX ? X86::VMOVAPDZ128rm :
381 HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
382 else
383 Opc = HasVLX ? X86::VMOVUPDZ128rm :
384 HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
385 break;
386 case MVT::v4i32:
387 case MVT::v2i64:
388 case MVT::v8i16:
389 case MVT::v16i8:
390 if (IsNonTemporal && Alignment >= 16 && HasSSE41)
391 Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
392 HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
393 else if (Alignment >= 16)
394 Opc = HasVLX ? X86::VMOVDQA64Z128rm :
395 HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
396 else
397 Opc = HasVLX ? X86::VMOVDQU64Z128rm :
398 HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
399 break;
400 case MVT::v8f32:
401 assert(HasAVX);
402 if (IsNonTemporal && Alignment >= 32 && HasAVX2)
403 Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
404 else if (IsNonTemporal && Alignment >= 16)
405 return false; // Force split for X86::VMOVNTDQArm
406 else if (Alignment >= 32)
407 Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
408 else
409 Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
410 break;
411 case MVT::v4f64:
412 assert(HasAVX);
413 if (IsNonTemporal && Alignment >= 32 && HasAVX2)
414 Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
415 else if (IsNonTemporal && Alignment >= 16)
416 return false; // Force split for X86::VMOVNTDQArm
417 else if (Alignment >= 32)
418 Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
419 else
420 Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
421 break;
422 case MVT::v8i32:
423 case MVT::v4i64:
424 case MVT::v16i16:
425 case MVT::v32i8:
426 assert(HasAVX);
427 if (IsNonTemporal && Alignment >= 32 && HasAVX2)
428 Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
429 else if (IsNonTemporal && Alignment >= 16)
430 return false; // Force split for X86::VMOVNTDQArm
431 else if (Alignment >= 32)
432 Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
433 else
434 Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
435 break;
436 case MVT::v16f32:
437 assert(HasAVX512);
438 if (IsNonTemporal && Alignment >= 64)
439 Opc = X86::VMOVNTDQAZrm;
440 else
441 Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
442 break;
443 case MVT::v8f64:
444 assert(HasAVX512);
445 if (IsNonTemporal && Alignment >= 64)
446 Opc = X86::VMOVNTDQAZrm;
447 else
448 Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
449 break;
450 case MVT::v8i64:
451 case MVT::v16i32:
452 case MVT::v32i16:
453 case MVT::v64i8:
454 assert(HasAVX512);
455 // Note: There are a lot more choices based on type with AVX-512, but
456 // there's really no advantage when the load isn't masked.
457 if (IsNonTemporal && Alignment >= 64)
458 Opc = X86::VMOVNTDQAZrm;
459 else
460 Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
461 break;
462 }
463
464 const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
465
466 ResultReg = createResultReg(RC);
468 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg);
469 addFullAddress(MIB, AM);
470 if (MMO)
471 MIB->addMemOperand(*FuncInfo.MF, MMO);
472 return true;
473}
474
475/// X86FastEmitStore - Emit a machine instruction to store a value Val of
476/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
477/// and a displacement offset, or a GlobalAddress,
478/// i.e. V. Return true if it is possible.
479bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM,
480 MachineMemOperand *MMO, bool Aligned) {
481 bool HasSSE1 = Subtarget->hasSSE1();
482 bool HasSSE2 = Subtarget->hasSSE2();
483 bool HasSSE4A = Subtarget->hasSSE4A();
484 bool HasAVX = Subtarget->hasAVX();
485 bool HasAVX512 = Subtarget->hasAVX512();
486 bool HasVLX = Subtarget->hasVLX();
487 bool IsNonTemporal = MMO && MMO->isNonTemporal();
488
489 // Get opcode and regclass of the output for the given store instruction.
490 unsigned Opc = 0;
491 switch (VT.getSimpleVT().SimpleTy) {
492 case MVT::f80: // No f80 support yet.
493 default: return false;
494 case MVT::i1: {
495 // Mask out all but lowest bit.
496 Register AndResult = createResultReg(&X86::GR8RegClass);
497 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
498 TII.get(X86::AND8ri), AndResult)
499 .addReg(ValReg).addImm(1);
500 ValReg = AndResult;
501 [[fallthrough]]; // handle i1 as i8.
502 }
503 case MVT::i8: Opc = X86::MOV8mr; break;
504 case MVT::i16: Opc = X86::MOV16mr; break;
505 case MVT::i32:
506 Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
507 break;
508 case MVT::i64:
509 // Must be in x86-64 mode.
510 Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
511 break;
512 case MVT::f32:
513 if (HasSSE1) {
514 if (IsNonTemporal && HasSSE4A)
515 Opc = X86::MOVNTSS;
516 else
517 Opc = HasAVX512 ? X86::VMOVSSZmr :
518 HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
519 } else
520 Opc = X86::ST_Fp32m;
521 break;
522 case MVT::f64:
523 if (HasSSE2) {
524 if (IsNonTemporal && HasSSE4A)
525 Opc = X86::MOVNTSD;
526 else
527 Opc = HasAVX512 ? X86::VMOVSDZmr :
528 HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
529 } else
530 Opc = X86::ST_Fp64m;
531 break;
532 case MVT::x86mmx:
533 Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
534 break;
535 case MVT::v4f32:
536 if (Aligned) {
537 if (IsNonTemporal)
538 Opc = HasVLX ? X86::VMOVNTPSZ128mr :
539 HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
540 else
541 Opc = HasVLX ? X86::VMOVAPSZ128mr :
542 HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
543 } else
544 Opc = HasVLX ? X86::VMOVUPSZ128mr :
545 HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
546 break;
547 case MVT::v2f64:
548 if (Aligned) {
549 if (IsNonTemporal)
550 Opc = HasVLX ? X86::VMOVNTPDZ128mr :
551 HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
552 else
553 Opc = HasVLX ? X86::VMOVAPDZ128mr :
554 HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
555 } else
556 Opc = HasVLX ? X86::VMOVUPDZ128mr :
557 HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
558 break;
559 case MVT::v4i32:
560 case MVT::v2i64:
561 case MVT::v8i16:
562 case MVT::v16i8:
563 if (Aligned) {
564 if (IsNonTemporal)
565 Opc = HasVLX ? X86::VMOVNTDQZ128mr :
566 HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
567 else
568 Opc = HasVLX ? X86::VMOVDQA64Z128mr :
569 HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
570 } else
571 Opc = HasVLX ? X86::VMOVDQU64Z128mr :
572 HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
573 break;
574 case MVT::v8f32:
575 assert(HasAVX);
576 if (Aligned) {
577 if (IsNonTemporal)
578 Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
579 else
580 Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
581 } else
582 Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
583 break;
584 case MVT::v4f64:
585 assert(HasAVX);
586 if (Aligned) {
587 if (IsNonTemporal)
588 Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
589 else
590 Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
591 } else
592 Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
593 break;
594 case MVT::v8i32:
595 case MVT::v4i64:
596 case MVT::v16i16:
597 case MVT::v32i8:
598 assert(HasAVX);
599 if (Aligned) {
600 if (IsNonTemporal)
601 Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
602 else
603 Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
604 } else
605 Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
606 break;
607 case MVT::v16f32:
608 assert(HasAVX512);
609 if (Aligned)
610 Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
611 else
612 Opc = X86::VMOVUPSZmr;
613 break;
614 case MVT::v8f64:
615 assert(HasAVX512);
616 if (Aligned) {
617 Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
618 } else
619 Opc = X86::VMOVUPDZmr;
620 break;
621 case MVT::v8i64:
622 case MVT::v16i32:
623 case MVT::v32i16:
624 case MVT::v64i8:
625 assert(HasAVX512);
626 // Note: There are a lot more choices based on type with AVX-512, but
627 // there's really no advantage when the store isn't masked.
628 if (Aligned)
629 Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
630 else
631 Opc = X86::VMOVDQU64Zmr;
632 break;
633 }
634
635 const MCInstrDesc &Desc = TII.get(Opc);
636 // Some of the instructions in the previous switch use FR128 instead
637 // of FR32 for ValReg. Make sure the register we feed the instruction
638 // matches its register class constraints.
639 // Note: This is fine to do a copy from FR32 to FR128, this is the
640 // same registers behind the scene and actually why it did not trigger
641 // any bugs before.
642 ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
644 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, Desc);
645 addFullAddress(MIB, AM).addReg(ValReg);
646 if (MMO)
647 MIB->addMemOperand(*FuncInfo.MF, MMO);
648
649 return true;
650}
651
652bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
653 X86AddressMode &AM,
654 MachineMemOperand *MMO, bool Aligned) {
655 // Handle 'null' like i32/i64 0.
656 if (isa<ConstantPointerNull>(Val))
657 Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
658
659 // If this is a store of a simple constant, fold the constant into the store.
660 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
661 unsigned Opc = 0;
662 bool Signed = true;
663 switch (VT.getSimpleVT().SimpleTy) {
664 default: break;
665 case MVT::i1:
666 Signed = false;
667 [[fallthrough]]; // Handle as i8.
668 case MVT::i8: Opc = X86::MOV8mi; break;
669 case MVT::i16: Opc = X86::MOV16mi; break;
670 case MVT::i32: Opc = X86::MOV32mi; break;
671 case MVT::i64:
672 // Must be a 32-bit sign extended value.
673 if (isInt<32>(CI->getSExtValue()))
674 Opc = X86::MOV64mi32;
675 break;
676 }
677
678 if (Opc) {
680 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc));
681 addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
682 : CI->getZExtValue());
683 if (MMO)
684 MIB->addMemOperand(*FuncInfo.MF, MMO);
685 return true;
686 }
687 }
688
689 Register ValReg = getRegForValue(Val);
690 if (ValReg == 0)
691 return false;
692
693 return X86FastEmitStore(VT, ValReg, AM, MMO, Aligned);
694}
695
696/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
697/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
698/// ISD::SIGN_EXTEND).
699bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
700 unsigned Src, EVT SrcVT,
701 unsigned &ResultReg) {
702 unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
703 if (RR == 0)
704 return false;
705
706 ResultReg = RR;
707 return true;
708}
709
710bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
711 // Handle constant address.
712 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
713 // Can't handle alternate code models yet.
714 if (TM.getCodeModel() != CodeModel::Small)
715 return false;
716
717 // Can't handle TLS yet.
718 if (GV->isThreadLocal())
719 return false;
720
721 // Can't handle !absolute_symbol references yet.
722 if (GV->isAbsoluteSymbolRef())
723 return false;
724
725 // RIP-relative addresses can't have additional register operands, so if
726 // we've already folded stuff into the addressing mode, just force the
727 // global value into its own register, which we can use as the basereg.
728 if (!Subtarget->isPICStyleRIPRel() ||
729 (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
730 // Okay, we've committed to selecting this global. Set up the address.
731 AM.GV = GV;
732
733 // Allow the subtarget to classify the global.
734 unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
735
736 // If this reference is relative to the pic base, set it now.
737 if (isGlobalRelativeToPICBase(GVFlags)) {
738 // FIXME: How do we know Base.Reg is free??
739 AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
740 }
741
742 // Unless the ABI requires an extra load, return a direct reference to
743 // the global.
744 if (!isGlobalStubReference(GVFlags)) {
745 if (Subtarget->isPICStyleRIPRel()) {
746 // Use rip-relative addressing if we can. Above we verified that the
747 // base and index registers are unused.
748 assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
749 AM.Base.Reg = X86::RIP;
750 }
751 AM.GVOpFlags = GVFlags;
752 return true;
753 }
754
755 // Ok, we need to do a load from a stub. If we've already loaded from
756 // this stub, reuse the loaded pointer, otherwise emit the load now.
757 DenseMap<const Value *, Register>::iterator I = LocalValueMap.find(V);
758 Register LoadReg;
759 if (I != LocalValueMap.end() && I->second) {
760 LoadReg = I->second;
761 } else {
762 // Issue load from stub.
763 unsigned Opc = 0;
764 const TargetRegisterClass *RC = nullptr;
765 X86AddressMode StubAM;
766 StubAM.Base.Reg = AM.Base.Reg;
767 StubAM.GV = GV;
768 StubAM.GVOpFlags = GVFlags;
769
770 // Prepare for inserting code in the local-value area.
771 SavePoint SaveInsertPt = enterLocalValueArea();
772
773 if (TLI.getPointerTy(DL) == MVT::i64) {
774 Opc = X86::MOV64rm;
775 RC = &X86::GR64RegClass;
776 } else {
777 Opc = X86::MOV32rm;
778 RC = &X86::GR32RegClass;
779 }
780
781 if (Subtarget->isPICStyleRIPRel() || GVFlags == X86II::MO_GOTPCREL ||
783 StubAM.Base.Reg = X86::RIP;
784
785 LoadReg = createResultReg(RC);
786 MachineInstrBuilder LoadMI =
787 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), LoadReg);
788 addFullAddress(LoadMI, StubAM);
789
790 // Ok, back to normal mode.
791 leaveLocalValueArea(SaveInsertPt);
792
793 // Prevent loading GV stub multiple times in same MBB.
794 LocalValueMap[V] = LoadReg;
795 }
796
797 // Now construct the final address. Note that the Disp, Scale,
798 // and Index values may already be set here.
799 AM.Base.Reg = LoadReg;
800 AM.GV = nullptr;
801 return true;
802 }
803 }
804
805 // If all else fails, try to materialize the value in a register.
806 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
807 if (AM.Base.Reg == 0) {
808 AM.Base.Reg = getRegForValue(V);
809 return AM.Base.Reg != 0;
810 }
811 if (AM.IndexReg == 0) {
812 assert(AM.Scale == 1 && "Scale with no index!");
813 AM.IndexReg = getRegForValue(V);
814 return AM.IndexReg != 0;
815 }
816 }
817
818 return false;
819}
820
821/// X86SelectAddress - Attempt to fill in an address from the given value.
822///
823bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
825redo_gep:
826 const User *U = nullptr;
827 unsigned Opcode = Instruction::UserOp1;
828 if (const Instruction *I = dyn_cast<Instruction>(V)) {
829 // Don't walk into other basic blocks; it's possible we haven't
830 // visited them yet, so the instructions may not yet be assigned
831 // virtual registers.
832 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
833 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
834 Opcode = I->getOpcode();
835 U = I;
836 }
837 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
838 Opcode = C->getOpcode();
839 U = C;
840 }
841
842 if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
843 if (Ty->getAddressSpace() > 255)
844 // Fast instruction selection doesn't support the special
845 // address spaces.
846 return false;
847
848 switch (Opcode) {
849 default: break;
850 case Instruction::BitCast:
851 // Look past bitcasts.
852 return X86SelectAddress(U->getOperand(0), AM);
853
854 case Instruction::IntToPtr:
855 // Look past no-op inttoptrs.
856 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
857 TLI.getPointerTy(DL))
858 return X86SelectAddress(U->getOperand(0), AM);
859 break;
860
861 case Instruction::PtrToInt:
862 // Look past no-op ptrtoints.
863 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
864 return X86SelectAddress(U->getOperand(0), AM);
865 break;
866
867 case Instruction::Alloca: {
868 // Do static allocas.
869 const AllocaInst *A = cast<AllocaInst>(V);
871 FuncInfo.StaticAllocaMap.find(A);
872 if (SI != FuncInfo.StaticAllocaMap.end()) {
874 AM.Base.FrameIndex = SI->second;
875 return true;
876 }
877 break;
878 }
879
880 case Instruction::Add: {
881 // Adds of constants are common and easy enough.
882 if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
883 uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
884 // They have to fit in the 32-bit signed displacement field though.
885 if (isInt<32>(Disp)) {
886 AM.Disp = (uint32_t)Disp;
887 return X86SelectAddress(U->getOperand(0), AM);
888 }
889 }
890 break;
891 }
892
893 case Instruction::GetElementPtr: {
894 X86AddressMode SavedAM = AM;
895
896 // Pattern-match simple GEPs.
897 uint64_t Disp = (int32_t)AM.Disp;
898 unsigned IndexReg = AM.IndexReg;
899 unsigned Scale = AM.Scale;
901 // Iterate through the indices, folding what we can. Constants can be
902 // folded, and one dynamic index can be handled, if the scale is supported.
903 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
904 i != e; ++i, ++GTI) {
905 const Value *Op = *i;
906 if (StructType *STy = GTI.getStructTypeOrNull()) {
907 const StructLayout *SL = DL.getStructLayout(STy);
908 Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
909 continue;
910 }
911
912 // A array/variable index is always of the form i*S where S is the
913 // constant scale size. See if we can push the scale into immediates.
914 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
915 for (;;) {
916 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
917 // Constant-offset addressing.
918 Disp += CI->getSExtValue() * S;
919 break;
920 }
921 if (canFoldAddIntoGEP(U, Op)) {
922 // A compatible add with a constant operand. Fold the constant.
923 ConstantInt *CI =
924 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
925 Disp += CI->getSExtValue() * S;
926 // Iterate on the other operand.
927 Op = cast<AddOperator>(Op)->getOperand(0);
928 continue;
929 }
930 if (IndexReg == 0 &&
931 (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
932 (S == 1 || S == 2 || S == 4 || S == 8)) {
933 // Scaled-index addressing.
934 Scale = S;
935 IndexReg = getRegForGEPIndex(Op);
936 if (IndexReg == 0)
937 return false;
938 break;
939 }
940 // Unsupported.
941 goto unsupported_gep;
942 }
943 }
944
945 // Check for displacement overflow.
946 if (!isInt<32>(Disp))
947 break;
948
949 AM.IndexReg = IndexReg;
950 AM.Scale = Scale;
951 AM.Disp = (uint32_t)Disp;
952 GEPs.push_back(V);
953
954 if (const GetElementPtrInst *GEP =
955 dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
956 // Ok, the GEP indices were covered by constant-offset and scaled-index
957 // addressing. Update the address state and move on to examining the base.
958 V = GEP;
959 goto redo_gep;
960 } else if (X86SelectAddress(U->getOperand(0), AM)) {
961 return true;
962 }
963
964 // If we couldn't merge the gep value into this addr mode, revert back to
965 // our address and just match the value instead of completely failing.
966 AM = SavedAM;
967
968 for (const Value *I : reverse(GEPs))
969 if (handleConstantAddresses(I, AM))
970 return true;
971
972 return false;
973 unsupported_gep:
974 // Ok, the GEP indices weren't all covered.
975 break;
976 }
977 }
978
979 return handleConstantAddresses(V, AM);
980}
981
982/// X86SelectCallAddress - Attempt to fill in an address from the given value.
983///
984bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
985 const User *U = nullptr;
986 unsigned Opcode = Instruction::UserOp1;
987 const Instruction *I = dyn_cast<Instruction>(V);
988 // Record if the value is defined in the same basic block.
989 //
990 // This information is crucial to know whether or not folding an
991 // operand is valid.
992 // Indeed, FastISel generates or reuses a virtual register for all
993 // operands of all instructions it selects. Obviously, the definition and
994 // its uses must use the same virtual register otherwise the produced
995 // code is incorrect.
996 // Before instruction selection, FunctionLoweringInfo::set sets the virtual
997 // registers for values that are alive across basic blocks. This ensures
998 // that the values are consistently set between across basic block, even
999 // if different instruction selection mechanisms are used (e.g., a mix of
1000 // SDISel and FastISel).
1001 // For values local to a basic block, the instruction selection process
1002 // generates these virtual registers with whatever method is appropriate
1003 // for its needs. In particular, FastISel and SDISel do not share the way
1004 // local virtual registers are set.
1005 // Therefore, this is impossible (or at least unsafe) to share values
1006 // between basic blocks unless they use the same instruction selection
1007 // method, which is not guarantee for X86.
1008 // Moreover, things like hasOneUse could not be used accurately, if we
1009 // allow to reference values across basic blocks whereas they are not
1010 // alive across basic blocks initially.
1011 bool InMBB = true;
1012 if (I) {
1013 Opcode = I->getOpcode();
1014 U = I;
1015 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
1016 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
1017 Opcode = C->getOpcode();
1018 U = C;
1019 }
1020
1021 switch (Opcode) {
1022 default: break;
1023 case Instruction::BitCast:
1024 // Look past bitcasts if its operand is in the same BB.
1025 if (InMBB)
1026 return X86SelectCallAddress(U->getOperand(0), AM);
1027 break;
1028
1029 case Instruction::IntToPtr:
1030 // Look past no-op inttoptrs if its operand is in the same BB.
1031 if (InMBB &&
1032 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
1033 TLI.getPointerTy(DL))
1034 return X86SelectCallAddress(U->getOperand(0), AM);
1035 break;
1036
1037 case Instruction::PtrToInt:
1038 // Look past no-op ptrtoints if its operand is in the same BB.
1039 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
1040 return X86SelectCallAddress(U->getOperand(0), AM);
1041 break;
1042 }
1043
1044 // Handle constant address.
1045 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
1046 // Can't handle alternate code models yet.
1047 if (TM.getCodeModel() != CodeModel::Small)
1048 return false;
1049
1050 // RIP-relative addresses can't have additional register operands.
1051 if (Subtarget->isPICStyleRIPRel() &&
1052 (AM.Base.Reg != 0 || AM.IndexReg != 0))
1053 return false;
1054
1055 // Can't handle TLS.
1056 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
1057 if (GVar->isThreadLocal())
1058 return false;
1059
1060 // Okay, we've committed to selecting this global. Set up the basic address.
1061 AM.GV = GV;
1062
1063 // Return a direct reference to the global. Fastisel can handle calls to
1064 // functions that require loads, such as dllimport and nonlazybind
1065 // functions.
1066 if (Subtarget->isPICStyleRIPRel()) {
1067 // Use rip-relative addressing if we can. Above we verified that the
1068 // base and index registers are unused.
1069 assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
1070 AM.Base.Reg = X86::RIP;
1071 } else {
1072 AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
1073 }
1074
1075 return true;
1076 }
1077
1078 // If all else fails, try to materialize the value in a register.
1079 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
1080 auto GetCallRegForValue = [this](const Value *V) {
1081 Register Reg = getRegForValue(V);
1082
1083 // In 64-bit mode, we need a 64-bit register even if pointers are 32 bits.
1084 if (Reg && Subtarget->isTarget64BitILP32()) {
1085 Register CopyReg = createResultReg(&X86::GR32RegClass);
1086 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV32rr),
1087 CopyReg)
1088 .addReg(Reg);
1089
1090 Register ExtReg = createResultReg(&X86::GR64RegClass);
1091 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1092 TII.get(TargetOpcode::SUBREG_TO_REG), ExtReg)
1093 .addImm(0)
1094 .addReg(CopyReg)
1095 .addImm(X86::sub_32bit);
1096 Reg = ExtReg;
1097 }
1098
1099 return Reg;
1100 };
1101
1102 if (AM.Base.Reg == 0) {
1103 AM.Base.Reg = GetCallRegForValue(V);
1104 return AM.Base.Reg != 0;
1105 }
1106 if (AM.IndexReg == 0) {
1107 assert(AM.Scale == 1 && "Scale with no index!");
1108 AM.IndexReg = GetCallRegForValue(V);
1109 return AM.IndexReg != 0;
1110 }
1111 }
1112
1113 return false;
1114}
1115
1116
1117/// X86SelectStore - Select and emit code to implement store instructions.
1118bool X86FastISel::X86SelectStore(const Instruction *I) {
1119 // Atomic stores need special handling.
1120 const StoreInst *S = cast<StoreInst>(I);
1121
1122 if (S->isAtomic())
1123 return false;
1124
1125 const Value *PtrV = I->getOperand(1);
1126 if (TLI.supportSwiftError()) {
1127 // Swifterror values can come from either a function parameter with
1128 // swifterror attribute or an alloca with swifterror attribute.
1129 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
1130 if (Arg->hasSwiftErrorAttr())
1131 return false;
1132 }
1133
1134 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
1135 if (Alloca->isSwiftError())
1136 return false;
1137 }
1138 }
1139
1140 const Value *Val = S->getValueOperand();
1141 const Value *Ptr = S->getPointerOperand();
1142
1143 MVT VT;
1144 if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
1145 return false;
1146
1147 Align Alignment = S->getAlign();
1148 Align ABIAlignment = DL.getABITypeAlign(Val->getType());
1149 bool Aligned = Alignment >= ABIAlignment;
1150
1151 X86AddressMode AM;
1152 if (!X86SelectAddress(Ptr, AM))
1153 return false;
1154
1155 return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
1156}
1157
1158/// X86SelectRet - Select and emit code to implement ret instructions.
1159bool X86FastISel::X86SelectRet(const Instruction *I) {
1160 const ReturnInst *Ret = cast<ReturnInst>(I);
1161 const Function &F = *I->getParent()->getParent();
1162 const X86MachineFunctionInfo *X86MFInfo =
1163 FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
1164
1165 if (!FuncInfo.CanLowerReturn)
1166 return false;
1167
1168 if (TLI.supportSwiftError() &&
1169 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
1170 return false;
1171
1172 if (TLI.supportSplitCSR(FuncInfo.MF))
1173 return false;
1174
1175 CallingConv::ID CC = F.getCallingConv();
1176 if (CC != CallingConv::C &&
1177 CC != CallingConv::Fast &&
1178 CC != CallingConv::Tail &&
1185 return false;
1186
1187 // Don't handle popping bytes if they don't fit the ret's immediate.
1188 if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
1189 return false;
1190
1191 // fastcc with -tailcallopt is intended to provide a guaranteed
1192 // tail call optimization. Fastisel doesn't know how to do that.
1193 if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
1195 return false;
1196
1197 // Let SDISel handle vararg functions.
1198 if (F.isVarArg())
1199 return false;
1200
1201 // Build a list of return value registers.
1203
1204 if (Ret->getNumOperands() > 0) {
1206 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1207
1208 // Analyze operands of the call, assigning locations to each operand.
1210 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
1211 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1212
1213 const Value *RV = Ret->getOperand(0);
1214 Register Reg = getRegForValue(RV);
1215 if (Reg == 0)
1216 return false;
1217
1218 // Only handle a single return value for now.
1219 if (ValLocs.size() != 1)
1220 return false;
1221
1222 CCValAssign &VA = ValLocs[0];
1223
1224 // Don't bother handling odd stuff for now.
1225 if (VA.getLocInfo() != CCValAssign::Full)
1226 return false;
1227 // Only handle register returns for now.
1228 if (!VA.isRegLoc())
1229 return false;
1230
1231 // The calling-convention tables for x87 returns don't tell
1232 // the whole story.
1233 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
1234 return false;
1235
1236 unsigned SrcReg = Reg + VA.getValNo();
1237 EVT SrcVT = TLI.getValueType(DL, RV->getType());
1238 EVT DstVT = VA.getValVT();
1239 // Special handling for extended integers.
1240 if (SrcVT != DstVT) {
1241 if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
1242 return false;
1243
1244 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
1245 return false;
1246
1247 assert(DstVT == MVT::i32 && "X86 should always ext to i32");
1248
1249 if (SrcVT == MVT::i1) {
1250 if (Outs[0].Flags.isSExt())
1251 return false;
1252 // TODO
1253 SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg);
1254 SrcVT = MVT::i8;
1255 }
1256 unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
1258 // TODO
1259 SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op, SrcReg);
1260 }
1261
1262 // Make the copy.
1263 Register DstReg = VA.getLocReg();
1264 const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
1265 // Avoid a cross-class copy. This is very unlikely.
1266 if (!SrcRC->contains(DstReg))
1267 return false;
1268 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1269 TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
1270
1271 // Add register to return instruction.
1272 RetRegs.push_back(VA.getLocReg());
1273 }
1274
1275 // Swift calling convention does not require we copy the sret argument
1276 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
1277
1278 // All x86 ABIs require that for returning structs by value we copy
1279 // the sret argument into %rax/%eax (depending on ABI) for the return.
1280 // We saved the argument into a virtual register in the entry block,
1281 // so now we copy the value out and into %rax/%eax.
1282 if (F.hasStructRetAttr() && CC != CallingConv::Swift &&
1284 Register Reg = X86MFInfo->getSRetReturnReg();
1285 assert(Reg &&
1286 "SRetReturnReg should have been set in LowerFormalArguments()!");
1287 unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
1288 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1289 TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
1290 RetRegs.push_back(RetReg);
1291 }
1292
1293 // Now emit the RET.
1295 if (X86MFInfo->getBytesToPopOnReturn()) {
1296 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1297 TII.get(Subtarget->is64Bit() ? X86::RETI64 : X86::RETI32))
1298 .addImm(X86MFInfo->getBytesToPopOnReturn());
1299 } else {
1300 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1301 TII.get(Subtarget->is64Bit() ? X86::RET64 : X86::RET32));
1302 }
1303 for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1304 MIB.addReg(RetRegs[i], RegState::Implicit);
1305 return true;
1306}
1307
1308/// X86SelectLoad - Select and emit code to implement load instructions.
1309///
1310bool X86FastISel::X86SelectLoad(const Instruction *I) {
1311 const LoadInst *LI = cast<LoadInst>(I);
1312
1313 // Atomic loads need special handling.
1314 if (LI->isAtomic())
1315 return false;
1316
1317 const Value *SV = I->getOperand(0);
1318 if (TLI.supportSwiftError()) {
1319 // Swifterror values can come from either a function parameter with
1320 // swifterror attribute or an alloca with swifterror attribute.
1321 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1322 if (Arg->hasSwiftErrorAttr())
1323 return false;
1324 }
1325
1326 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1327 if (Alloca->isSwiftError())
1328 return false;
1329 }
1330 }
1331
1332 MVT VT;
1333 if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
1334 return false;
1335
1336 const Value *Ptr = LI->getPointerOperand();
1337
1338 X86AddressMode AM;
1339 if (!X86SelectAddress(Ptr, AM))
1340 return false;
1341
1342 unsigned ResultReg = 0;
1343 if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
1344 LI->getAlign().value()))
1345 return false;
1346
1347 updateValueMap(I, ResultReg);
1348 return true;
1349}
1350
1351static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
1352 bool HasAVX512 = Subtarget->hasAVX512();
1353 bool HasAVX = Subtarget->hasAVX();
1354 bool HasSSE1 = Subtarget->hasSSE1();
1355 bool HasSSE2 = Subtarget->hasSSE2();
1356
1357 switch (VT.getSimpleVT().SimpleTy) {
1358 default: return 0;
1359 case MVT::i8: return X86::CMP8rr;
1360 case MVT::i16: return X86::CMP16rr;
1361 case MVT::i32: return X86::CMP32rr;
1362 case MVT::i64: return X86::CMP64rr;
1363 case MVT::f32:
1364 return HasAVX512 ? X86::VUCOMISSZrr
1365 : HasAVX ? X86::VUCOMISSrr
1366 : HasSSE1 ? X86::UCOMISSrr
1367 : 0;
1368 case MVT::f64:
1369 return HasAVX512 ? X86::VUCOMISDZrr
1370 : HasAVX ? X86::VUCOMISDrr
1371 : HasSSE2 ? X86::UCOMISDrr
1372 : 0;
1373 }
1374}
1375
1376/// If we have a comparison with RHS as the RHS of the comparison, return an
1377/// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
1378static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
1379 int64_t Val = RHSC->getSExtValue();
1380 switch (VT.getSimpleVT().SimpleTy) {
1381 // Otherwise, we can't fold the immediate into this comparison.
1382 default:
1383 return 0;
1384 case MVT::i8:
1385 return X86::CMP8ri;
1386 case MVT::i16:
1387 if (isInt<8>(Val))
1388 return X86::CMP16ri8;
1389 return X86::CMP16ri;
1390 case MVT::i32:
1391 if (isInt<8>(Val))
1392 return X86::CMP32ri8;
1393 return X86::CMP32ri;
1394 case MVT::i64:
1395 if (isInt<8>(Val))
1396 return X86::CMP64ri8;
1397 // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
1398 // field.
1399 if (isInt<32>(Val))
1400 return X86::CMP64ri32;
1401 return 0;
1402 }
1403}
1404
1405bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
1406 const DebugLoc &CurMIMD) {
1407 Register Op0Reg = getRegForValue(Op0);
1408 if (Op0Reg == 0) return false;
1409
1410 // Handle 'null' like i32/i64 0.
1411 if (isa<ConstantPointerNull>(Op1))
1412 Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
1413
1414 // We have two options: compare with register or immediate. If the RHS of
1415 // the compare is an immediate that we can fold into this compare, use
1416 // CMPri, otherwise use CMPrr.
1417 if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1418 if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
1419 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurMIMD, TII.get(CompareImmOpc))
1420 .addReg(Op0Reg)
1421 .addImm(Op1C->getSExtValue());
1422 return true;
1423 }
1424 }
1425
1426 unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
1427 if (CompareOpc == 0) return false;
1428
1429 Register Op1Reg = getRegForValue(Op1);
1430 if (Op1Reg == 0) return false;
1431 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurMIMD, TII.get(CompareOpc))
1432 .addReg(Op0Reg)
1433 .addReg(Op1Reg);
1434
1435 return true;
1436}
1437
1438bool X86FastISel::X86SelectCmp(const Instruction *I) {
1439 const CmpInst *CI = cast<CmpInst>(I);
1440
1441 MVT VT;
1442 if (!isTypeLegal(I->getOperand(0)->getType(), VT))
1443 return false;
1444
1445 // Below code only works for scalars.
1446 if (VT.isVector())
1447 return false;
1448
1449 // Try to optimize or fold the cmp.
1450 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1451 unsigned ResultReg = 0;
1452 switch (Predicate) {
1453 default: break;
1454 case CmpInst::FCMP_FALSE: {
1455 ResultReg = createResultReg(&X86::GR32RegClass);
1456 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV32r0),
1457 ResultReg);
1458 ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, X86::sub_8bit);
1459 if (!ResultReg)
1460 return false;
1461 break;
1462 }
1463 case CmpInst::FCMP_TRUE: {
1464 ResultReg = createResultReg(&X86::GR8RegClass);
1465 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV8ri),
1466 ResultReg).addImm(1);
1467 break;
1468 }
1469 }
1470
1471 if (ResultReg) {
1472 updateValueMap(I, ResultReg);
1473 return true;
1474 }
1475
1476 const Value *LHS = CI->getOperand(0);
1477 const Value *RHS = CI->getOperand(1);
1478
1479 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
1480 // We don't have to materialize a zero constant for this case and can just use
1481 // %x again on the RHS.
1482 if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1483 const auto *RHSC = dyn_cast<ConstantFP>(RHS);
1484 if (RHSC && RHSC->isNullValue())
1485 RHS = LHS;
1486 }
1487
1488 // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1489 static const uint16_t SETFOpcTable[2][3] = {
1490 { X86::COND_E, X86::COND_NP, X86::AND8rr },
1491 { X86::COND_NE, X86::COND_P, X86::OR8rr }
1492 };
1493 const uint16_t *SETFOpc = nullptr;
1494 switch (Predicate) {
1495 default: break;
1496 case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
1497 case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
1498 }
1499
1500 ResultReg = createResultReg(&X86::GR8RegClass);
1501 if (SETFOpc) {
1502 if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1503 return false;
1504
1505 Register FlagReg1 = createResultReg(&X86::GR8RegClass);
1506 Register FlagReg2 = createResultReg(&X86::GR8RegClass);
1507 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
1508 FlagReg1).addImm(SETFOpc[0]);
1509 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
1510 FlagReg2).addImm(SETFOpc[1]);
1511 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(SETFOpc[2]),
1512 ResultReg).addReg(FlagReg1).addReg(FlagReg2);
1513 updateValueMap(I, ResultReg);
1514 return true;
1515 }
1516
1518 bool SwapArgs;
1519 std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1520 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1521
1522 if (SwapArgs)
1523 std::swap(LHS, RHS);
1524
1525 // Emit a compare of LHS/RHS.
1526 if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
1527 return false;
1528
1529 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
1530 ResultReg).addImm(CC);
1531 updateValueMap(I, ResultReg);
1532 return true;
1533}
1534
1535bool X86FastISel::X86SelectZExt(const Instruction *I) {
1536 EVT DstVT = TLI.getValueType(DL, I->getType());
1537 if (!TLI.isTypeLegal(DstVT))
1538 return false;
1539
1540 Register ResultReg = getRegForValue(I->getOperand(0));
1541 if (ResultReg == 0)
1542 return false;
1543
1544 // Handle zero-extension from i1 to i8, which is common.
1545 MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1546 if (SrcVT == MVT::i1) {
1547 // Set the high bits to zero.
1548 ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg);
1549 SrcVT = MVT::i8;
1550
1551 if (ResultReg == 0)
1552 return false;
1553 }
1554
1555 if (DstVT == MVT::i64) {
1556 // Handle extension to 64-bits via sub-register shenanigans.
1557 unsigned MovInst;
1558
1559 switch (SrcVT.SimpleTy) {
1560 case MVT::i8: MovInst = X86::MOVZX32rr8; break;
1561 case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1562 case MVT::i32: MovInst = X86::MOV32rr; break;
1563 default: llvm_unreachable("Unexpected zext to i64 source type");
1564 }
1565
1566 Register Result32 = createResultReg(&X86::GR32RegClass);
1567 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(MovInst), Result32)
1568 .addReg(ResultReg);
1569
1570 ResultReg = createResultReg(&X86::GR64RegClass);
1571 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::SUBREG_TO_REG),
1572 ResultReg)
1573 .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1574 } else if (DstVT == MVT::i16) {
1575 // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1576 // extend to 32-bits and then extract down to 16-bits.
1577 Register Result32 = createResultReg(&X86::GR32RegClass);
1578 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOVZX32rr8),
1579 Result32).addReg(ResultReg);
1580
1581 ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, X86::sub_16bit);
1582 } else if (DstVT != MVT::i8) {
1583 ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1584 ResultReg);
1585 if (ResultReg == 0)
1586 return false;
1587 }
1588
1589 updateValueMap(I, ResultReg);
1590 return true;
1591}
1592
1593bool X86FastISel::X86SelectSExt(const Instruction *I) {
1594 EVT DstVT = TLI.getValueType(DL, I->getType());
1595 if (!TLI.isTypeLegal(DstVT))
1596 return false;
1597
1598 Register ResultReg = getRegForValue(I->getOperand(0));
1599 if (ResultReg == 0)
1600 return false;
1601
1602 // Handle sign-extension from i1 to i8.
1603 MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1604 if (SrcVT == MVT::i1) {
1605 // Set the high bits to zero.
1606 Register ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg);
1607 if (ZExtReg == 0)
1608 return false;
1609
1610 // Negate the result to make an 8-bit sign extended value.
1611 ResultReg = createResultReg(&X86::GR8RegClass);
1612 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::NEG8r),
1613 ResultReg).addReg(ZExtReg);
1614
1615 SrcVT = MVT::i8;
1616 }
1617
1618 if (DstVT == MVT::i16) {
1619 // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1620 // extend to 32-bits and then extract down to 16-bits.
1621 Register Result32 = createResultReg(&X86::GR32RegClass);
1622 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOVSX32rr8),
1623 Result32).addReg(ResultReg);
1624
1625 ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, X86::sub_16bit);
1626 } else if (DstVT != MVT::i8) {
1627 ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
1628 ResultReg);
1629 if (ResultReg == 0)
1630 return false;
1631 }
1632
1633 updateValueMap(I, ResultReg);
1634 return true;
1635}
1636
1637bool X86FastISel::X86SelectBranch(const Instruction *I) {
1638 // Unconditional branches are selected by tablegen-generated code.
1639 // Handle a conditional branch.
1640 const BranchInst *BI = cast<BranchInst>(I);
1641 MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1642 MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1643
1644 // Fold the common case of a conditional branch with a comparison
1645 // in the same block (values defined on other blocks may not have
1646 // initialized registers).
1648 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1649 if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1650 EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1651
1652 // Try to optimize or fold the cmp.
1653 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1654 switch (Predicate) {
1655 default: break;
1656 case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, MIMD.getDL()); return true;
1657 case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, MIMD.getDL()); return true;
1658 }
1659
1660 const Value *CmpLHS = CI->getOperand(0);
1661 const Value *CmpRHS = CI->getOperand(1);
1662
1663 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
1664 // 0.0.
1665 // We don't have to materialize a zero constant for this case and can just
1666 // use %x again on the RHS.
1667 if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
1668 const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
1669 if (CmpRHSC && CmpRHSC->isNullValue())
1670 CmpRHS = CmpLHS;
1671 }
1672
1673 // Try to take advantage of fallthrough opportunities.
1674 if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1675 std::swap(TrueMBB, FalseMBB);
1677 }
1678
1679 // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
1680 // code check. Instead two branch instructions are required to check all
1681 // the flags. First we change the predicate to a supported condition code,
1682 // which will be the first branch. Later one we will emit the second
1683 // branch.
1684 bool NeedExtraBranch = false;
1685 switch (Predicate) {
1686 default: break;
1687 case CmpInst::FCMP_OEQ:
1688 std::swap(TrueMBB, FalseMBB);
1689 [[fallthrough]];
1690 case CmpInst::FCMP_UNE:
1691 NeedExtraBranch = true;
1693 break;
1694 }
1695
1696 bool SwapArgs;
1697 std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1698 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1699
1700 if (SwapArgs)
1701 std::swap(CmpLHS, CmpRHS);
1702
1703 // Emit a compare of the LHS and RHS, setting the flags.
1704 if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
1705 return false;
1706
1707 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1))
1708 .addMBB(TrueMBB).addImm(CC);
1709
1710 // X86 requires a second branch to handle UNE (and OEQ, which is mapped
1711 // to UNE above).
1712 if (NeedExtraBranch) {
1713 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1))
1714 .addMBB(TrueMBB).addImm(X86::COND_P);
1715 }
1716
1717 finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1718 return true;
1719 }
1720 } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1721 // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1722 // typically happen for _Bool and C++ bools.
1723 MVT SourceVT;
1724 if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1725 isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1726 unsigned TestOpc = 0;
1727 switch (SourceVT.SimpleTy) {
1728 default: break;
1729 case MVT::i8: TestOpc = X86::TEST8ri; break;
1730 case MVT::i16: TestOpc = X86::TEST16ri; break;
1731 case MVT::i32: TestOpc = X86::TEST32ri; break;
1732 case MVT::i64: TestOpc = X86::TEST64ri32; break;
1733 }
1734 if (TestOpc) {
1735 Register OpReg = getRegForValue(TI->getOperand(0));
1736 if (OpReg == 0) return false;
1737
1738 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TestOpc))
1739 .addReg(OpReg).addImm(1);
1740
1741 unsigned JmpCond = X86::COND_NE;
1742 if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1743 std::swap(TrueMBB, FalseMBB);
1744 JmpCond = X86::COND_E;
1745 }
1746
1747 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1))
1748 .addMBB(TrueMBB).addImm(JmpCond);
1749
1750 finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1751 return true;
1752 }
1753 }
1754 } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
1755 // Fake request the condition, otherwise the intrinsic might be completely
1756 // optimized away.
1757 Register TmpReg = getRegForValue(BI->getCondition());
1758 if (TmpReg == 0)
1759 return false;
1760
1761 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1))
1762 .addMBB(TrueMBB).addImm(CC);
1763 finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1764 return true;
1765 }
1766
1767 // Otherwise do a clumsy setcc and re-test it.
1768 // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1769 // in an explicit cast, so make sure to handle that correctly.
1770 Register OpReg = getRegForValue(BI->getCondition());
1771 if (OpReg == 0) return false;
1772
1773 // In case OpReg is a K register, COPY to a GPR
1774 if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1775 unsigned KOpReg = OpReg;
1776 OpReg = createResultReg(&X86::GR32RegClass);
1777 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1778 TII.get(TargetOpcode::COPY), OpReg)
1779 .addReg(KOpReg);
1780 OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, X86::sub_8bit);
1781 }
1782 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::TEST8ri))
1783 .addReg(OpReg)
1784 .addImm(1);
1785 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::JCC_1))
1786 .addMBB(TrueMBB).addImm(X86::COND_NE);
1787 finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
1788 return true;
1789}
1790
1791bool X86FastISel::X86SelectShift(const Instruction *I) {
1792 unsigned CReg = 0, OpReg = 0;
1793 const TargetRegisterClass *RC = nullptr;
1794 if (I->getType()->isIntegerTy(8)) {
1795 CReg = X86::CL;
1796 RC = &X86::GR8RegClass;
1797 switch (I->getOpcode()) {
1798 case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1799 case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1800 case Instruction::Shl: OpReg = X86::SHL8rCL; break;
1801 default: return false;
1802 }
1803 } else if (I->getType()->isIntegerTy(16)) {
1804 CReg = X86::CX;
1805 RC = &X86::GR16RegClass;
1806 switch (I->getOpcode()) {
1807 default: llvm_unreachable("Unexpected shift opcode");
1808 case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1809 case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1810 case Instruction::Shl: OpReg = X86::SHL16rCL; break;
1811 }
1812 } else if (I->getType()->isIntegerTy(32)) {
1813 CReg = X86::ECX;
1814 RC = &X86::GR32RegClass;
1815 switch (I->getOpcode()) {
1816 default: llvm_unreachable("Unexpected shift opcode");
1817 case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1818 case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1819 case Instruction::Shl: OpReg = X86::SHL32rCL; break;
1820 }
1821 } else if (I->getType()->isIntegerTy(64)) {
1822 CReg = X86::RCX;
1823 RC = &X86::GR64RegClass;
1824 switch (I->getOpcode()) {
1825 default: llvm_unreachable("Unexpected shift opcode");
1826 case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1827 case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1828 case Instruction::Shl: OpReg = X86::SHL64rCL; break;
1829 }
1830 } else {
1831 return false;
1832 }
1833
1834 MVT VT;
1835 if (!isTypeLegal(I->getType(), VT))
1836 return false;
1837
1838 Register Op0Reg = getRegForValue(I->getOperand(0));
1839 if (Op0Reg == 0) return false;
1840
1841 Register Op1Reg = getRegForValue(I->getOperand(1));
1842 if (Op1Reg == 0) return false;
1843 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
1844 CReg).addReg(Op1Reg);
1845
1846 // The shift instruction uses X86::CL. If we defined a super-register
1847 // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1848 if (CReg != X86::CL)
1849 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1850 TII.get(TargetOpcode::KILL), X86::CL)
1851 .addReg(CReg, RegState::Kill);
1852
1853 Register ResultReg = createResultReg(RC);
1854 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(OpReg), ResultReg)
1855 .addReg(Op0Reg);
1856 updateValueMap(I, ResultReg);
1857 return true;
1858}
1859
1860bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1861 const static unsigned NumTypes = 4; // i8, i16, i32, i64
1862 const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1863 const static bool S = true; // IsSigned
1864 const static bool U = false; // !IsSigned
1865 const static unsigned Copy = TargetOpcode::COPY;
1866 // For the X86 DIV/IDIV instruction, in most cases the dividend
1867 // (numerator) must be in a specific register pair highreg:lowreg,
1868 // producing the quotient in lowreg and the remainder in highreg.
1869 // For most data types, to set up the instruction, the dividend is
1870 // copied into lowreg, and lowreg is sign-extended or zero-extended
1871 // into highreg. The exception is i8, where the dividend is defined
1872 // as a single register rather than a register pair, and we
1873 // therefore directly sign-extend or zero-extend the dividend into
1874 // lowreg, instead of copying, and ignore the highreg.
1875 const static struct DivRemEntry {
1876 // The following portion depends only on the data type.
1877 const TargetRegisterClass *RC;
1878 unsigned LowInReg; // low part of the register pair
1879 unsigned HighInReg; // high part of the register pair
1880 // The following portion depends on both the data type and the operation.
1881 struct DivRemResult {
1882 unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1883 unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1884 // highreg, or copying a zero into highreg.
1885 unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1886 // zero/sign-extending into lowreg for i8.
1887 unsigned DivRemResultReg; // Register containing the desired result.
1888 bool IsOpSigned; // Whether to use signed or unsigned form.
1889 } ResultTable[NumOps];
1890 } OpTable[NumTypes] = {
1891 { &X86::GR8RegClass, X86::AX, 0, {
1892 { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1893 { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1894 { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1895 { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1896 }
1897 }, // i8
1898 { &X86::GR16RegClass, X86::AX, X86::DX, {
1899 { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1900 { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1901 { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
1902 { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
1903 }
1904 }, // i16
1905 { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1906 { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1907 { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1908 { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1909 { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1910 }
1911 }, // i32
1912 { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1913 { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1914 { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1915 { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
1916 { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
1917 }
1918 }, // i64
1919 };
1920
1921 MVT VT;
1922 if (!isTypeLegal(I->getType(), VT))
1923 return false;
1924
1925 unsigned TypeIndex, OpIndex;
1926 switch (VT.SimpleTy) {
1927 default: return false;
1928 case MVT::i8: TypeIndex = 0; break;
1929 case MVT::i16: TypeIndex = 1; break;
1930 case MVT::i32: TypeIndex = 2; break;
1931 case MVT::i64: TypeIndex = 3;
1932 if (!Subtarget->is64Bit())
1933 return false;
1934 break;
1935 }
1936
1937 switch (I->getOpcode()) {
1938 default: llvm_unreachable("Unexpected div/rem opcode");
1939 case Instruction::SDiv: OpIndex = 0; break;
1940 case Instruction::SRem: OpIndex = 1; break;
1941 case Instruction::UDiv: OpIndex = 2; break;
1942 case Instruction::URem: OpIndex = 3; break;
1943 }
1944
1945 const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1946 const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1947 Register Op0Reg = getRegForValue(I->getOperand(0));
1948 if (Op0Reg == 0)
1949 return false;
1950 Register Op1Reg = getRegForValue(I->getOperand(1));
1951 if (Op1Reg == 0)
1952 return false;
1953
1954 // Move op0 into low-order input register.
1955 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1956 TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1957 // Zero-extend or sign-extend into high-order input register.
1958 if (OpEntry.OpSignExtend) {
1959 if (OpEntry.IsOpSigned)
1960 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1961 TII.get(OpEntry.OpSignExtend));
1962 else {
1963 Register Zero32 = createResultReg(&X86::GR32RegClass);
1964 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1965 TII.get(X86::MOV32r0), Zero32);
1966
1967 // Copy the zero into the appropriate sub/super/identical physical
1968 // register. Unfortunately the operations needed are not uniform enough
1969 // to fit neatly into the table above.
1970 if (VT == MVT::i16) {
1971 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1972 TII.get(Copy), TypeEntry.HighInReg)
1973 .addReg(Zero32, 0, X86::sub_16bit);
1974 } else if (VT == MVT::i32) {
1975 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1976 TII.get(Copy), TypeEntry.HighInReg)
1977 .addReg(Zero32);
1978 } else if (VT == MVT::i64) {
1979 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1980 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1981 .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1982 }
1983 }
1984 }
1985 // Generate the DIV/IDIV instruction.
1986 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1987 TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1988 // For i8 remainder, we can't reference ah directly, as we'll end
1989 // up with bogus copies like %r9b = COPY %ah. Reference ax
1990 // instead to prevent ah references in a rex instruction.
1991 //
1992 // The current assumption of the fast register allocator is that isel
1993 // won't generate explicit references to the GR8_NOREX registers. If
1994 // the allocator and/or the backend get enhanced to be more robust in
1995 // that regard, this can be, and should be, removed.
1996 unsigned ResultReg = 0;
1997 if ((I->getOpcode() == Instruction::SRem ||
1998 I->getOpcode() == Instruction::URem) &&
1999 OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
2000 Register SourceSuperReg = createResultReg(&X86::GR16RegClass);
2001 Register ResultSuperReg = createResultReg(&X86::GR16RegClass);
2002 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2003 TII.get(Copy), SourceSuperReg).addReg(X86::AX);
2004
2005 // Shift AX right by 8 bits instead of using AH.
2006 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SHR16ri),
2007 ResultSuperReg).addReg(SourceSuperReg).addImm(8);
2008
2009 // Now reference the 8-bit subreg of the result.
2010 ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
2011 X86::sub_8bit);
2012 }
2013 // Copy the result out of the physreg if we haven't already.
2014 if (!ResultReg) {
2015 ResultReg = createResultReg(TypeEntry.RC);
2016 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Copy), ResultReg)
2017 .addReg(OpEntry.DivRemResultReg);
2018 }
2019 updateValueMap(I, ResultReg);
2020
2021 return true;
2022}
2023
2024/// Emit a conditional move instruction (if the are supported) to lower
2025/// the select.
2026bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
2027 // Check if the subtarget supports these instructions.
2028 if (!Subtarget->canUseCMOV())
2029 return false;
2030
2031 // FIXME: Add support for i8.
2032 if (RetVT < MVT::i16 || RetVT > MVT::i64)
2033 return false;
2034
2035 const Value *Cond = I->getOperand(0);
2036 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2037 bool NeedTest = true;
2039
2040 // Optimize conditions coming from a compare if both instructions are in the
2041 // same basic block (values defined in other basic blocks may not have
2042 // initialized registers).
2043 const auto *CI = dyn_cast<CmpInst>(Cond);
2044 if (CI && (CI->getParent() == I->getParent())) {
2045 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2046
2047 // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
2048 static const uint16_t SETFOpcTable[2][3] = {
2049 { X86::COND_NP, X86::COND_E, X86::TEST8rr },
2050 { X86::COND_P, X86::COND_NE, X86::OR8rr }
2051 };
2052 const uint16_t *SETFOpc = nullptr;
2053 switch (Predicate) {
2054 default: break;
2055 case CmpInst::FCMP_OEQ:
2056 SETFOpc = &SETFOpcTable[0][0];
2058 break;
2059 case CmpInst::FCMP_UNE:
2060 SETFOpc = &SETFOpcTable[1][0];
2062 break;
2063 }
2064
2065 bool NeedSwap;
2066 std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
2067 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
2068
2069 const Value *CmpLHS = CI->getOperand(0);
2070 const Value *CmpRHS = CI->getOperand(1);
2071 if (NeedSwap)
2072 std::swap(CmpLHS, CmpRHS);
2073
2074 EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2075 // Emit a compare of the LHS and RHS, setting the flags.
2076 if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2077 return false;
2078
2079 if (SETFOpc) {
2080 Register FlagReg1 = createResultReg(&X86::GR8RegClass);
2081 Register FlagReg2 = createResultReg(&X86::GR8RegClass);
2082 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
2083 FlagReg1).addImm(SETFOpc[0]);
2084 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
2085 FlagReg2).addImm(SETFOpc[1]);
2086 auto const &II = TII.get(SETFOpc[2]);
2087 if (II.getNumDefs()) {
2088 Register TmpReg = createResultReg(&X86::GR8RegClass);
2089 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, TmpReg)
2090 .addReg(FlagReg2).addReg(FlagReg1);
2091 } else {
2092 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2093 .addReg(FlagReg2).addReg(FlagReg1);
2094 }
2095 }
2096 NeedTest = false;
2097 } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
2098 // Fake request the condition, otherwise the intrinsic might be completely
2099 // optimized away.
2100 Register TmpReg = getRegForValue(Cond);
2101 if (TmpReg == 0)
2102 return false;
2103
2104 NeedTest = false;
2105 }
2106
2107 if (NeedTest) {
2108 // Selects operate on i1, however, CondReg is 8 bits width and may contain
2109 // garbage. Indeed, only the less significant bit is supposed to be
2110 // accurate. If we read more than the lsb, we may see non-zero values
2111 // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
2112 // the select. This is achieved by performing TEST against 1.
2113 Register CondReg = getRegForValue(Cond);
2114 if (CondReg == 0)
2115 return false;
2116
2117 // In case OpReg is a K register, COPY to a GPR
2118 if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2119 unsigned KCondReg = CondReg;
2120 CondReg = createResultReg(&X86::GR32RegClass);
2121 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2122 TII.get(TargetOpcode::COPY), CondReg)
2123 .addReg(KCondReg);
2124 CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, X86::sub_8bit);
2125 }
2126 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::TEST8ri))
2127 .addReg(CondReg)
2128 .addImm(1);
2129 }
2130
2131 const Value *LHS = I->getOperand(1);
2132 const Value *RHS = I->getOperand(2);
2133
2134 Register RHSReg = getRegForValue(RHS);
2135 Register LHSReg = getRegForValue(LHS);
2136 if (!LHSReg || !RHSReg)
2137 return false;
2138
2139 const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
2140 unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(*RC)/8);
2141 Register ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, LHSReg, CC);
2142 updateValueMap(I, ResultReg);
2143 return true;
2144}
2145
2146/// Emit SSE or AVX instructions to lower the select.
2147///
2148/// Try to use SSE1/SSE2 instructions to simulate a select without branches.
2149/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
2150/// SSE instructions are available. If AVX is available, try to use a VBLENDV.
2151bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
2152 // Optimize conditions coming from a compare if both instructions are in the
2153 // same basic block (values defined in other basic blocks may not have
2154 // initialized registers).
2155 const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
2156 if (!CI || (CI->getParent() != I->getParent()))
2157 return false;
2158
2159 if (I->getType() != CI->getOperand(0)->getType() ||
2160 !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
2161 (Subtarget->hasSSE2() && RetVT == MVT::f64)))
2162 return false;
2163
2164 const Value *CmpLHS = CI->getOperand(0);
2165 const Value *CmpRHS = CI->getOperand(1);
2166 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2167
2168 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
2169 // We don't have to materialize a zero constant for this case and can just use
2170 // %x again on the RHS.
2171 if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
2172 const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
2173 if (CmpRHSC && CmpRHSC->isNullValue())
2174 CmpRHS = CmpLHS;
2175 }
2176
2177 unsigned CC;
2178 bool NeedSwap;
2179 std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
2180 if (CC > 7 && !Subtarget->hasAVX())
2181 return false;
2182
2183 if (NeedSwap)
2184 std::swap(CmpLHS, CmpRHS);
2185
2186 const Value *LHS = I->getOperand(1);
2187 const Value *RHS = I->getOperand(2);
2188
2189 Register LHSReg = getRegForValue(LHS);
2190 Register RHSReg = getRegForValue(RHS);
2191 Register CmpLHSReg = getRegForValue(CmpLHS);
2192 Register CmpRHSReg = getRegForValue(CmpRHS);
2193 if (!LHSReg || !RHSReg || !CmpLHSReg || !CmpRHSReg)
2194 return false;
2195
2196 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2197 unsigned ResultReg;
2198
2199 if (Subtarget->hasAVX512()) {
2200 // If we have AVX512 we can use a mask compare and masked movss/sd.
2201 const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
2202 const TargetRegisterClass *VK1 = &X86::VK1RegClass;
2203
2204 unsigned CmpOpcode =
2205 (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2206 Register CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpRHSReg,
2207 CC);
2208
2209 // Need an IMPLICIT_DEF for the input that is used to generate the upper
2210 // bits of the result register since its not based on any of the inputs.
2211 Register ImplicitDefReg = createResultReg(VR128X);
2212 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2213 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2214
2215 // Place RHSReg is the passthru of the masked movss/sd operation and put
2216 // LHS in the input. The mask input comes from the compare.
2217 unsigned MovOpcode =
2218 (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2219 unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, CmpReg,
2220 ImplicitDefReg, LHSReg);
2221
2222 ResultReg = createResultReg(RC);
2223 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2224 TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
2225
2226 } else if (Subtarget->hasAVX()) {
2227 const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2228
2229 // If we have AVX, create 1 blendv instead of 3 logic instructions.
2230 // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2231 // uses XMM0 as the selection register. That may need just as many
2232 // instructions as the AND/ANDN/OR sequence due to register moves, so
2233 // don't bother.
2234 unsigned CmpOpcode =
2235 (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2236 unsigned BlendOpcode =
2237 (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2238
2239 Register CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpRHSReg,
2240 CC);
2241 Register VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, LHSReg,
2242 CmpReg);
2243 ResultReg = createResultReg(RC);
2244 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2245 TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
2246 } else {
2247 // Choose the SSE instruction sequence based on data type (float or double).
2248 static const uint16_t OpcTable[2][4] = {
2249 { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2250 { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2251 };
2252
2253 const uint16_t *Opc = nullptr;
2254 switch (RetVT.SimpleTy) {
2255 default: return false;
2256 case MVT::f32: Opc = &OpcTable[0][0]; break;
2257 case MVT::f64: Opc = &OpcTable[1][0]; break;
2258 }
2259
2260 const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2261 Register CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpRHSReg, CC);
2262 Register AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, LHSReg);
2263 Register AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, RHSReg);
2264 Register OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, AndReg);
2265 ResultReg = createResultReg(RC);
2266 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2267 TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
2268 }
2269 updateValueMap(I, ResultReg);
2270 return true;
2271}
2272
2273bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
2274 // These are pseudo CMOV instructions and will be later expanded into control-
2275 // flow.
2276 unsigned Opc;
2277 switch (RetVT.SimpleTy) {
2278 default: return false;
2279 case MVT::i8: Opc = X86::CMOV_GR8; break;
2280 case MVT::i16: Opc = X86::CMOV_GR16; break;
2281 case MVT::i32: Opc = X86::CMOV_GR32; break;
2282 case MVT::f16:
2283 Opc = Subtarget->hasAVX512() ? X86::CMOV_FR16X : X86::CMOV_FR16; break;
2284 case MVT::f32:
2285 Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X : X86::CMOV_FR32; break;
2286 case MVT::f64:
2287 Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X : X86::CMOV_FR64; break;
2288 }
2289
2290 const Value *Cond = I->getOperand(0);
2292
2293 // Optimize conditions coming from a compare if both instructions are in the
2294 // same basic block (values defined in other basic blocks may not have
2295 // initialized registers).
2296 const auto *CI = dyn_cast<CmpInst>(Cond);
2297 if (CI && (CI->getParent() == I->getParent())) {
2298 bool NeedSwap;
2299 std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
2301 return false;
2302
2303 const Value *CmpLHS = CI->getOperand(0);
2304 const Value *CmpRHS = CI->getOperand(1);
2305
2306 if (NeedSwap)
2307 std::swap(CmpLHS, CmpRHS);
2308
2309 EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
2310 if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
2311 return false;
2312 } else {
2313 Register CondReg = getRegForValue(Cond);
2314 if (CondReg == 0)
2315 return false;
2316
2317 // In case OpReg is a K register, COPY to a GPR
2318 if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2319 unsigned KCondReg = CondReg;
2320 CondReg = createResultReg(&X86::GR32RegClass);
2321 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2322 TII.get(TargetOpcode::COPY), CondReg)
2323 .addReg(KCondReg);
2324 CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, X86::sub_8bit);
2325 }
2326 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::TEST8ri))
2327 .addReg(CondReg)
2328 .addImm(1);
2329 }
2330
2331 const Value *LHS = I->getOperand(1);
2332 const Value *RHS = I->getOperand(2);
2333
2334 Register LHSReg = getRegForValue(LHS);
2335 Register RHSReg = getRegForValue(RHS);
2336 if (!LHSReg || !RHSReg)
2337 return false;
2338
2339 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2340
2341 Register ResultReg =
2342 fastEmitInst_rri(Opc, RC, RHSReg, LHSReg, CC);
2343 updateValueMap(I, ResultReg);
2344 return true;
2345}
2346
2347bool X86FastISel::X86SelectSelect(const Instruction *I) {
2348 MVT RetVT;
2349 if (!isTypeLegal(I->getType(), RetVT))
2350 return false;
2351
2352 // Check if we can fold the select.
2353 if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
2354 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2355 const Value *Opnd = nullptr;
2356 switch (Predicate) {
2357 default: break;
2358 case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
2359 case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
2360 }
2361 // No need for a select anymore - this is an unconditional move.
2362 if (Opnd) {
2363 Register OpReg = getRegForValue(Opnd);
2364 if (OpReg == 0)
2365 return false;
2366 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2367 Register ResultReg = createResultReg(RC);
2368 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2369 TII.get(TargetOpcode::COPY), ResultReg)
2370 .addReg(OpReg);
2371 updateValueMap(I, ResultReg);
2372 return true;
2373 }
2374 }
2375
2376 // First try to use real conditional move instructions.
2377 if (X86FastEmitCMoveSelect(RetVT, I))
2378 return true;
2379
2380 // Try to use a sequence of SSE instructions to simulate a conditional move.
2381 if (X86FastEmitSSESelect(RetVT, I))
2382 return true;
2383
2384 // Fall-back to pseudo conditional move instructions, which will be later
2385 // converted to control-flow.
2386 if (X86FastEmitPseudoSelect(RetVT, I))
2387 return true;
2388
2389 return false;
2390}
2391
2392// Common code for X86SelectSIToFP and X86SelectUIToFP.
2393bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
2394 // The target-independent selection algorithm in FastISel already knows how
2395 // to select a SINT_TO_FP if the target is SSE but not AVX.
2396 // Early exit if the subtarget doesn't have AVX.
2397 // Unsigned conversion requires avx512.
2398 bool HasAVX512 = Subtarget->hasAVX512();
2399 if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
2400 return false;
2401
2402 // TODO: We could sign extend narrower types.
2403 MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
2404 if (SrcVT != MVT::i32 && SrcVT != MVT::i64)
2405 return false;
2406
2407 // Select integer to float/double conversion.
2408 Register OpReg = getRegForValue(I->getOperand(0));
2409 if (OpReg == 0)
2410 return false;
2411
2412 unsigned Opcode;
2413
2414 static const uint16_t SCvtOpc[2][2][2] = {
2415 { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr },
2416 { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } },
2417 { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
2418 { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
2419 };
2420 static const uint16_t UCvtOpc[2][2] = {
2421 { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
2422 { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
2423 };
2424 bool Is64Bit = SrcVT == MVT::i64;
2425
2426 if (I->getType()->isDoubleTy()) {
2427 // s/uitofp int -> double
2428 Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
2429 } else if (I->getType()->isFloatTy()) {
2430 // s/uitofp int -> float
2431 Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
2432 } else
2433 return false;
2434
2435 MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT();
2436 const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT);
2437 Register ImplicitDefReg = createResultReg(RC);
2438 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2439 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2440 Register ResultReg = fastEmitInst_rr(Opcode, RC, ImplicitDefReg, OpReg);
2441 updateValueMap(I, ResultReg);
2442 return true;
2443}
2444
2445bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
2446 return X86SelectIntToFP(I, /*IsSigned*/true);
2447}
2448
2449bool X86FastISel::X86SelectUIToFP(const Instruction *I) {
2450 return X86SelectIntToFP(I, /*IsSigned*/false);
2451}
2452
2453// Helper method used by X86SelectFPExt and X86SelectFPTrunc.
2454bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
2455 unsigned TargetOpc,
2456 const TargetRegisterClass *RC) {
2457 assert((I->getOpcode() == Instruction::FPExt ||
2458 I->getOpcode() == Instruction::FPTrunc) &&
2459 "Instruction must be an FPExt or FPTrunc!");
2460 bool HasAVX = Subtarget->hasAVX();
2461
2462 Register OpReg = getRegForValue(I->getOperand(0));
2463 if (OpReg == 0)
2464 return false;
2465
2466 unsigned ImplicitDefReg;
2467 if (HasAVX) {
2468 ImplicitDefReg = createResultReg(RC);
2469 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2470 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2471
2472 }
2473
2474 Register ResultReg = createResultReg(RC);
2476 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpc),
2477 ResultReg);
2478
2479 if (HasAVX)
2480 MIB.addReg(ImplicitDefReg);
2481
2482 MIB.addReg(OpReg);
2483 updateValueMap(I, ResultReg);
2484 return true;
2485}
2486
2487bool X86FastISel::X86SelectFPExt(const Instruction *I) {
2488 if (Subtarget->hasSSE2() && I->getType()->isDoubleTy() &&
2489 I->getOperand(0)->getType()->isFloatTy()) {
2490 bool HasAVX512 = Subtarget->hasAVX512();
2491 // fpext from float to double.
2492 unsigned Opc =
2493 HasAVX512 ? X86::VCVTSS2SDZrr
2494 : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
2495 return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f64));
2496 }
2497
2498 return false;
2499}
2500
2501bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
2502 if (Subtarget->hasSSE2() && I->getType()->isFloatTy() &&
2503 I->getOperand(0)->getType()->isDoubleTy()) {
2504 bool HasAVX512 = Subtarget->hasAVX512();
2505 // fptrunc from double to float.
2506 unsigned Opc =
2507 HasAVX512 ? X86::VCVTSD2SSZrr
2508 : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
2509 return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f32));
2510 }
2511
2512 return false;
2513}
2514
2515bool X86FastISel::X86SelectTrunc(const Instruction *I) {
2516 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
2517 EVT DstVT = TLI.getValueType(DL, I->getType());
2518
2519 // This code only handles truncation to byte.
2520 if (DstVT != MVT::i8 && DstVT != MVT::i1)
2521 return false;
2522 if (!TLI.isTypeLegal(SrcVT))
2523 return false;
2524
2525 Register InputReg = getRegForValue(I->getOperand(0));
2526 if (!InputReg)
2527 // Unhandled operand. Halt "fast" selection and bail.
2528 return false;
2529
2530 if (SrcVT == MVT::i8) {
2531 // Truncate from i8 to i1; no code needed.
2532 updateValueMap(I, InputReg);
2533 return true;
2534 }
2535
2536 // Issue an extract_subreg.
2537 Register ResultReg = fastEmitInst_extractsubreg(MVT::i8, InputReg,
2538 X86::sub_8bit);
2539 if (!ResultReg)
2540 return false;
2541
2542 updateValueMap(I, ResultReg);
2543 return true;
2544}
2545
2546bool X86FastISel::IsMemcpySmall(uint64_t Len) {
2547 return Len <= (Subtarget->is64Bit() ? 32 : 16);
2548}
2549
2550bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
2551 X86AddressMode SrcAM, uint64_t Len) {
2552
2553 // Make sure we don't bloat code by inlining very large memcpy's.
2554 if (!IsMemcpySmall(Len))
2555 return false;
2556
2557 bool i64Legal = Subtarget->is64Bit();
2558
2559 // We don't care about alignment here since we just emit integer accesses.
2560 while (Len) {
2561 MVT VT;
2562 if (Len >= 8 && i64Legal)
2563 VT = MVT::i64;
2564 else if (Len >= 4)
2565 VT = MVT::i32;
2566 else if (Len >= 2)
2567 VT = MVT::i16;
2568 else
2569 VT = MVT::i8;
2570
2571 unsigned Reg;
2572 bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
2573 RV &= X86FastEmitStore(VT, Reg, DestAM);
2574 assert(RV && "Failed to emit load or store??");
2575 (void)RV;
2576
2577 unsigned Size = VT.getSizeInBits()/8;
2578 Len -= Size;
2579 DestAM.Disp += Size;
2580 SrcAM.Disp += Size;
2581 }
2582
2583 return true;
2584}
2585
2586bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
2587 // FIXME: Handle more intrinsics.
2588 switch (II->getIntrinsicID()) {
2589 default: return false;
2590 case Intrinsic::convert_from_fp16:
2591 case Intrinsic::convert_to_fp16: {
2592 if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
2593 return false;
2594
2595 const Value *Op = II->getArgOperand(0);
2596 Register InputReg = getRegForValue(Op);
2597 if (InputReg == 0)
2598 return false;
2599
2600 // F16C only allows converting from float to half and from half to float.
2601 bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
2602 if (IsFloatToHalf) {
2603 if (!Op->getType()->isFloatTy())
2604 return false;
2605 } else {
2606 if (!II->getType()->isFloatTy())
2607 return false;
2608 }
2609
2610 unsigned ResultReg = 0;
2611 const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
2612 if (IsFloatToHalf) {
2613 // 'InputReg' is implicitly promoted from register class FR32 to
2614 // register class VR128 by method 'constrainOperandRegClass' which is
2615 // directly called by 'fastEmitInst_ri'.
2616 // Instruction VCVTPS2PHrr takes an extra immediate operand which is
2617 // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
2618 // It's consistent with the other FP instructions, which are usually
2619 // controlled by MXCSR.
2620 unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPS2PHZ128rr
2621 : X86::VCVTPS2PHrr;
2622 InputReg = fastEmitInst_ri(Opc, RC, InputReg, 4);
2623
2624 // Move the lower 32-bits of ResultReg to another register of class GR32.
2625 Opc = Subtarget->hasAVX512() ? X86::VMOVPDI2DIZrr
2626 : X86::VMOVPDI2DIrr;
2627 ResultReg = createResultReg(&X86::GR32RegClass);
2628 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2629 .addReg(InputReg, RegState::Kill);
2630
2631 // The result value is in the lower 16-bits of ResultReg.
2632 unsigned RegIdx = X86::sub_16bit;
2633 ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, RegIdx);
2634 } else {
2635 assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
2636 // Explicitly zero-extend the input to 32-bit.
2637 InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::ZERO_EXTEND, InputReg);
2638
2639 // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
2640 InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
2641 InputReg);
2642
2643 unsigned Opc = Subtarget->hasVLX() ? X86::VCVTPH2PSZ128rr
2644 : X86::VCVTPH2PSrr;
2645 InputReg = fastEmitInst_r(Opc, RC, InputReg);
2646
2647 // The result value is in the lower 32-bits of ResultReg.
2648 // Emit an explicit copy from register class VR128 to register class FR32.
2649 ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
2650 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2651 TII.get(TargetOpcode::COPY), ResultReg)
2652 .addReg(InputReg, RegState::Kill);
2653 }
2654
2655 updateValueMap(II, ResultReg);
2656 return true;
2657 }
2658 case Intrinsic::frameaddress: {
2659 MachineFunction *MF = FuncInfo.MF;
2660 if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
2661 return false;
2662
2664
2665 MVT VT;
2666 if (!isTypeLegal(RetTy, VT))
2667 return false;
2668
2669 unsigned Opc;
2670 const TargetRegisterClass *RC = nullptr;
2671
2672 switch (VT.SimpleTy) {
2673 default: llvm_unreachable("Invalid result type for frameaddress.");
2674 case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
2675 case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
2676 }
2677
2678 // This needs to be set before we call getPtrSizedFrameRegister, otherwise
2679 // we get the wrong frame register.
2680 MachineFrameInfo &MFI = MF->getFrameInfo();
2681 MFI.setFrameAddressIsTaken(true);
2682
2683 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2684 unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
2685 assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
2686 (FrameReg == X86::EBP && VT == MVT::i32)) &&
2687 "Invalid Frame Register!");
2688
2689 // Always make a copy of the frame register to a vreg first, so that we
2690 // never directly reference the frame register (the TwoAddressInstruction-
2691 // Pass doesn't like that).
2692 Register SrcReg = createResultReg(RC);
2693 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2694 TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
2695
2696 // Now recursively load from the frame address.
2697 // movq (%rbp), %rax
2698 // movq (%rax), %rax
2699 // movq (%rax), %rax
2700 // ...
2701 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2702 while (Depth--) {
2703 Register DestReg = createResultReg(RC);
2704 addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2705 TII.get(Opc), DestReg), SrcReg);
2706 SrcReg = DestReg;
2707 }
2708
2709 updateValueMap(II, SrcReg);
2710 return true;
2711 }
2712 case Intrinsic::memcpy: {
2713 const MemCpyInst *MCI = cast<MemCpyInst>(II);
2714 // Don't handle volatile or variable length memcpys.
2715 if (MCI->isVolatile())
2716 return false;
2717
2718 if (isa<ConstantInt>(MCI->getLength())) {
2719 // Small memcpy's are common enough that we want to do them
2720 // without a call if possible.
2721 uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
2722 if (IsMemcpySmall(Len)) {
2723 X86AddressMode DestAM, SrcAM;
2724 if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
2725 !X86SelectAddress(MCI->getRawSource(), SrcAM))
2726 return false;
2727 TryEmitSmallMemcpy(DestAM, SrcAM, Len);
2728 return true;
2729 }
2730 }
2731
2732 unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2733 if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
2734 return false;
2735
2736 if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
2737 return false;
2738
2739 return lowerCallTo(II, "memcpy", II->arg_size() - 1);
2740 }
2741 case Intrinsic::memset: {
2742 const MemSetInst *MSI = cast<MemSetInst>(II);
2743
2744 if (MSI->isVolatile())
2745 return false;
2746
2747 unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
2748 if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
2749 return false;
2750
2751 if (MSI->getDestAddressSpace() > 255)
2752 return false;
2753
2754 return lowerCallTo(II, "memset", II->arg_size() - 1);
2755 }
2756 case Intrinsic::stackprotector: {
2757 // Emit code to store the stack guard onto the stack.
2758 EVT PtrTy = TLI.getPointerTy(DL);
2759
2760 const Value *Op1 = II->getArgOperand(0); // The guard's value.
2761 const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
2762
2763 MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
2764
2765 // Grab the frame index.
2766 X86AddressMode AM;
2767 if (!X86SelectAddress(Slot, AM)) return false;
2768 if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
2769 return true;
2770 }
2771 case Intrinsic::dbg_declare: {
2772 const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
2773 X86AddressMode AM;
2774 assert(DI->getAddress() && "Null address should be checked earlier!");
2775 if (!X86SelectAddress(DI->getAddress(), AM))
2776 return false;
2777 const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
2778 assert(DI->getVariable()->isValidLocationForIntrinsic(MIMD.getDL()) &&
2779 "Expected inlined-at fields to agree");
2780 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II), AM)
2781 .addImm(0)
2782 .addMetadata(DI->getVariable())
2783 .addMetadata(DI->getExpression());
2784 return true;
2785 }
2786 case Intrinsic::trap: {
2787 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::TRAP));
2788 return true;
2789 }
2790 case Intrinsic::sqrt: {
2791 if (!Subtarget->hasSSE1())
2792 return false;
2793
2794 Type *RetTy = II->getCalledFunction()->getReturnType();
2795
2796 MVT VT;
2797 if (!isTypeLegal(RetTy, VT))
2798 return false;
2799
2800 // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
2801 // is not generated by FastISel yet.
2802 // FIXME: Update this code once tablegen can handle it.
2803 static const uint16_t SqrtOpc[3][2] = {
2804 { X86::SQRTSSr, X86::SQRTSDr },
2805 { X86::VSQRTSSr, X86::VSQRTSDr },
2806 { X86::VSQRTSSZr, X86::VSQRTSDZr },
2807 };
2808 unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
2809 Subtarget->hasAVX() ? 1 :
2810 0;
2811 unsigned Opc;
2812 switch (VT.SimpleTy) {
2813 default: return false;
2814 case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break;
2815 case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break;
2816 }
2817
2818 const Value *SrcVal = II->getArgOperand(0);
2819 Register SrcReg = getRegForValue(SrcVal);
2820
2821 if (SrcReg == 0)
2822 return false;
2823
2824 const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
2825 unsigned ImplicitDefReg = 0;
2826 if (AVXLevel > 0) {
2827 ImplicitDefReg = createResultReg(RC);
2828 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2829 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2830 }
2831
2832 Register ResultReg = createResultReg(RC);
2834 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
2835 ResultReg);
2836
2837 if (ImplicitDefReg)
2838 MIB.addReg(ImplicitDefReg);
2839
2840 MIB.addReg(SrcReg);
2841
2842 updateValueMap(II, ResultReg);
2843 return true;
2844 }
2845 case Intrinsic::sadd_with_overflow:
2846 case Intrinsic::uadd_with_overflow:
2847 case Intrinsic::ssub_with_overflow:
2848 case Intrinsic::usub_with_overflow:
2849 case Intrinsic::smul_with_overflow:
2850 case Intrinsic::umul_with_overflow: {
2851 // This implements the basic lowering of the xalu with overflow intrinsics
2852 // into add/sub/mul followed by either seto or setb.
2853 const Function *Callee = II->getCalledFunction();
2854 auto *Ty = cast<StructType>(Callee->getReturnType());
2855 Type *RetTy = Ty->getTypeAtIndex(0U);
2856 assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
2857 Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
2858 "Overflow value expected to be an i1");
2859
2860 MVT VT;
2861 if (!isTypeLegal(RetTy, VT))
2862 return false;
2863
2864 if (VT < MVT::i8 || VT > MVT::i64)
2865 return false;
2866
2867 const Value *LHS = II->getArgOperand(0);
2868 const Value *RHS = II->getArgOperand(1);
2869
2870 // Canonicalize immediate to the RHS.
2871 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
2872 std::swap(LHS, RHS);
2873
2874 unsigned BaseOpc, CondCode;
2875 switch (II->getIntrinsicID()) {
2876 default: llvm_unreachable("Unexpected intrinsic!");
2877 case Intrinsic::sadd_with_overflow:
2878 BaseOpc = ISD::ADD; CondCode = X86::COND_O; break;
2879 case Intrinsic::uadd_with_overflow:
2880 BaseOpc = ISD::ADD; CondCode = X86::COND_B; break;
2881 case Intrinsic::ssub_with_overflow:
2882 BaseOpc = ISD::SUB; CondCode = X86::COND_O; break;
2883 case Intrinsic::usub_with_overflow:
2884 BaseOpc = ISD::SUB; CondCode = X86::COND_B; break;
2885 case Intrinsic::smul_with_overflow:
2886 BaseOpc = X86ISD::SMUL; CondCode = X86::COND_O; break;
2887 case Intrinsic::umul_with_overflow:
2888 BaseOpc = X86ISD::UMUL; CondCode = X86::COND_O; break;
2889 }
2890
2891 Register LHSReg = getRegForValue(LHS);
2892 if (LHSReg == 0)
2893 return false;
2894
2895 unsigned ResultReg = 0;
2896 // Check if we have an immediate version.
2897 if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
2898 static const uint16_t Opc[2][4] = {
2899 { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
2900 { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
2901 };
2902
2903 if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
2904 CondCode == X86::COND_O) {
2905 // We can use INC/DEC.
2906 ResultReg = createResultReg(TLI.getRegClassFor(VT));
2907 bool IsDec = BaseOpc == ISD::SUB;
2908 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2909 TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
2910 .addReg(LHSReg);
2911 } else
2912 ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, CI->getZExtValue());
2913 }
2914
2915 unsigned RHSReg;
2916 if (!ResultReg) {
2917 RHSReg = getRegForValue(RHS);
2918 if (RHSReg == 0)
2919 return false;
2920 ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, RHSReg);
2921 }
2922
2923 // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
2924 // it manually.
2925 if (BaseOpc == X86ISD::UMUL && !ResultReg) {
2926 static const uint16_t MULOpc[] =
2927 { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
2928 static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
2929 // First copy the first operand into RAX, which is an implicit input to
2930 // the X86::MUL*r instruction.
2931 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2932 TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
2933 .addReg(LHSReg);
2934 ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
2935 TLI.getRegClassFor(VT), RHSReg);
2936 } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
2937 static const uint16_t MULOpc[] =
2938 { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
2939 if (VT == MVT::i8) {
2940 // Copy the first operand into AL, which is an implicit input to the
2941 // X86::IMUL8r instruction.
2942 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2943 TII.get(TargetOpcode::COPY), X86::AL)
2944 .addReg(LHSReg);
2945 ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg);
2946 } else
2947 ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
2948 TLI.getRegClassFor(VT), LHSReg, RHSReg);
2949 }
2950
2951 if (!ResultReg)
2952 return false;
2953
2954 // Assign to a GPR since the overflow return value is lowered to a SETcc.
2955 Register ResultReg2 = createResultReg(&X86::GR8RegClass);
2956 assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
2957 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::SETCCr),
2958 ResultReg2).addImm(CondCode);
2959
2960 updateValueMap(II, ResultReg, 2);
2961 return true;
2962 }
2963 case Intrinsic::x86_sse_cvttss2si:
2964 case Intrinsic::x86_sse_cvttss2si64:
2965 case Intrinsic::x86_sse2_cvttsd2si:
2966 case Intrinsic::x86_sse2_cvttsd2si64: {
2967 bool IsInputDouble;
2968 switch (II->getIntrinsicID()) {
2969 default: llvm_unreachable("Unexpected intrinsic.");
2970 case Intrinsic::x86_sse_cvttss2si:
2971 case Intrinsic::x86_sse_cvttss2si64:
2972 if (!Subtarget->hasSSE1())
2973 return false;
2974 IsInputDouble = false;
2975 break;
2976 case Intrinsic::x86_sse2_cvttsd2si:
2977 case Intrinsic::x86_sse2_cvttsd2si64:
2978 if (!Subtarget->hasSSE2())
2979 return false;
2980 IsInputDouble = true;
2981 break;
2982 }
2983
2984 Type *RetTy = II->getCalledFunction()->getReturnType();
2985 MVT VT;
2986 if (!isTypeLegal(RetTy, VT))
2987 return false;
2988
2989 static const uint16_t CvtOpc[3][2][2] = {
2990 { { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr },
2991 { X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } },
2992 { { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr },
2993 { X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } },
2994 { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr },
2995 { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } },
2996 };
2997 unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
2998 Subtarget->hasAVX() ? 1 :
2999 0;
3000 unsigned Opc;
3001 switch (VT.SimpleTy) {
3002 default: llvm_unreachable("Unexpected result type.");
3003 case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0]; break;
3004 case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1]; break;
3005 }
3006
3007 // Check if we can fold insertelement instructions into the convert.
3008 const Value *Op = II->getArgOperand(0);
3009 while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
3010 const Value *Index = IE->getOperand(2);
3011 if (!isa<ConstantInt>(Index))
3012 break;
3013 unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
3014
3015 if (Idx == 0) {
3016 Op = IE->getOperand(1);
3017 break;
3018 }
3019 Op = IE->getOperand(0);
3020 }
3021
3022 Register Reg = getRegForValue(Op);
3023 if (Reg == 0)
3024 return false;
3025
3026 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3027 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3028 .addReg(Reg);
3029
3030 updateValueMap(II, ResultReg);
3031 return true;
3032 }
3033 }
3034}
3035
3036bool X86FastISel::fastLowerArguments() {
3037 if (!FuncInfo.CanLowerReturn)
3038 return false;
3039
3040 const Function *F = FuncInfo.Fn;
3041 if (F->isVarArg())
3042 return false;
3043
3044 CallingConv::ID CC = F->getCallingConv();
3045 if (CC != CallingConv::C)
3046 return false;
3047
3048 if (Subtarget->isCallingConvWin64(CC))
3049 return false;
3050
3051 if (!Subtarget->is64Bit())
3052 return false;
3053
3054 if (Subtarget->useSoftFloat())
3055 return false;
3056
3057 // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
3058 unsigned GPRCnt = 0;
3059 unsigned FPRCnt = 0;
3060 for (auto const &Arg : F->args()) {
3061 if (Arg.hasAttribute(Attribute::ByVal) ||
3062 Arg.hasAttribute(Attribute::InReg) ||
3063 Arg.hasAttribute(Attribute::StructRet) ||
3064 Arg.hasAttribute(Attribute::SwiftSelf) ||
3065 Arg.hasAttribute(Attribute::SwiftAsync) ||
3066 Arg.hasAttribute(Attribute::SwiftError) ||
3067 Arg.hasAttribute(Attribute::Nest))
3068 return false;
3069
3070 Type *ArgTy = Arg.getType();
3071 if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3072 return false;
3073
3074 EVT ArgVT = TLI.getValueType(DL, ArgTy);
3075 if (!ArgVT.isSimple()) return false;
3076 switch (ArgVT.getSimpleVT().SimpleTy) {
3077 default: return false;
3078 case MVT::i32:
3079 case MVT::i64:
3080 ++GPRCnt;
3081 break;
3082 case MVT::f32:
3083 case MVT::f64:
3084 if (!Subtarget->hasSSE1())
3085 return false;
3086 ++FPRCnt;
3087 break;
3088 }
3089
3090 if (GPRCnt > 6)
3091 return false;
3092
3093 if (FPRCnt > 8)
3094 return false;
3095 }
3096
3097 static const MCPhysReg GPR32ArgRegs[] = {
3098 X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
3099 };
3100 static const MCPhysReg GPR64ArgRegs[] = {
3101 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
3102 };
3103 static const MCPhysReg XMMArgRegs[] = {
3104 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3105 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3106 };
3107
3108 unsigned GPRIdx = 0;
3109 unsigned FPRIdx = 0;
3110 for (auto const &Arg : F->args()) {
3111 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
3112 const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
3113 unsigned SrcReg;
3114 switch (VT.SimpleTy) {
3115 default: llvm_unreachable("Unexpected value type.");
3116 case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
3117 case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
3118 case MVT::f32: [[fallthrough]];
3119 case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
3120 }
3121 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3122 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3123 // Without this, EmitLiveInCopies may eliminate the livein if its only
3124 // use is a bitcast (which isn't turned into an instruction).
3125 Register ResultReg = createResultReg(RC);
3126 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3127 TII.get(TargetOpcode::COPY), ResultReg)
3128 .addReg(DstReg, getKillRegState(true));
3129 updateValueMap(&Arg, ResultReg);
3130 }
3131 return true;
3132}
3133
3134static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
3136 const CallBase *CB) {
3137 if (Subtarget->is64Bit())
3138 return 0;
3139 if (Subtarget->getTargetTriple().isOSMSVCRT())
3140 return 0;
3141 if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3144 return 0;
3145
3146 if (CB)
3147 if (CB->arg_empty() || !CB->paramHasAttr(0, Attribute::StructRet) ||
3148 CB->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
3149 return 0;
3150
3151 return 4;
3152}
3153
3154bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3155 auto &OutVals = CLI.OutVals;
3156 auto &OutFlags = CLI.OutFlags;
3157 auto &OutRegs = CLI.OutRegs;
3158 auto &Ins = CLI.Ins;
3159 auto &InRegs = CLI.InRegs;
3160 CallingConv::ID CC = CLI.CallConv;
3161 bool &IsTailCall = CLI.IsTailCall;
3162 bool IsVarArg = CLI.IsVarArg;
3163 const Value *Callee = CLI.Callee;
3164 MCSymbol *Symbol = CLI.Symbol;
3165 const auto *CB = CLI.CB;
3166
3167 bool Is64Bit = Subtarget->is64Bit();
3168 bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3169
3170 // Call / invoke instructions with NoCfCheck attribute require special
3171 // handling.
3172 if (CB && CB->doesNoCfCheck())
3173 return false;
3174
3175 // Functions with no_caller_saved_registers that need special handling.
3176 if ((CB && isa<CallInst>(CB) && CB->hasFnAttr("no_caller_saved_registers")))
3177 return false;
3178
3179 // Functions with no_callee_saved_registers that need special handling.
3180 if ((CB && CB->hasFnAttr("no_callee_saved_registers")))
3181 return false;
3182
3183 // Indirect calls with CFI checks need special handling.
3184 if (CB && CB->isIndirectCall() && CB->getOperandBundle(LLVMContext::OB_kcfi))
3185 return false;
3186
3187 // Functions using thunks for indirect calls need to use SDISel.
3188 if (Subtarget->useIndirectThunkCalls())
3189 return false;
3190
3191 // Handle only C, fastcc, and webkit_js calling conventions for now.
3192 switch (CC) {
3193 default: return false;
3194 case CallingConv::C:
3195 case CallingConv::Fast:
3196 case CallingConv::Tail:
3198 case CallingConv::Swift:
3203 case CallingConv::Win64:
3206 break;
3207 }
3208
3209 // Allow SelectionDAG isel to handle tail calls.
3210 if (IsTailCall)
3211 return false;
3212
3213 // fastcc with -tailcallopt is intended to provide a guaranteed
3214 // tail call optimization. Fastisel doesn't know how to do that.
3215 if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) ||
3217 return false;
3218
3219 // Don't know how to handle Win64 varargs yet. Nothing special needed for
3220 // x86-32. Special handling for x86-64 is implemented.
3221 if (IsVarArg && IsWin64)
3222 return false;
3223
3224 // Don't know about inalloca yet.
3225 if (CLI.CB && CLI.CB->hasInAllocaArgument())
3226 return false;
3227
3228 for (auto Flag : CLI.OutFlags)
3229 if (Flag.isSwiftError() || Flag.isPreallocated())
3230 return false;
3231
3232 SmallVector<MVT, 16> OutVTs;
3234
3235 // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
3236 // instruction. This is safe because it is common to all FastISel supported
3237 // calling conventions on x86.
3238 for (int i = 0, e = OutVals.size(); i != e; ++i) {
3239 Value *&Val = OutVals[i];
3240 ISD::ArgFlagsTy Flags = OutFlags[i];
3241 if (auto *CI = dyn_cast<ConstantInt>(Val)) {
3242 if (CI->getBitWidth() < 32) {
3243 if (Flags.isSExt())
3245 else
3247 }
3248 }
3249
3250 // Passing bools around ends up doing a trunc to i1 and passing it.
3251 // Codegen this as an argument + "and 1".
3252 MVT VT;
3253 auto *TI = dyn_cast<TruncInst>(Val);
3254 unsigned ResultReg;
3255 if (TI && TI->getType()->isIntegerTy(1) && CLI.CB &&
3256 (TI->getParent() == CLI.CB->getParent()) && TI->hasOneUse()) {
3257 Value *PrevVal = TI->getOperand(0);
3258 ResultReg = getRegForValue(PrevVal);
3259
3260 if (!ResultReg)
3261 return false;
3262
3263 if (!isTypeLegal(PrevVal->getType(), VT))
3264 return false;
3265
3266 ResultReg = fastEmit_ri(VT, VT, ISD::AND, ResultReg, 1);
3267 } else {
3268 if (!isTypeLegal(Val->getType(), VT) ||
3269 (VT.isVector() && VT.getVectorElementType() == MVT::i1))
3270 return false;
3271 ResultReg = getRegForValue(Val);
3272 }
3273
3274 if (!ResultReg)
3275 return false;
3276
3277 ArgRegs.push_back(ResultReg);
3278 OutVTs.push_back(VT);
3279 }
3280
3281 // Analyze operands of the call, assigning locations to each operand.
3283 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
3284
3285 // Allocate shadow area for Win64
3286 if (IsWin64)
3287 CCInfo.AllocateStack(32, Align(8));
3288
3289 CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
3290
3291 // Get a count of how many bytes are to be pushed on the stack.
3292 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3293
3294 // Issue CALLSEQ_START
3295 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3296 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3297 .addImm(NumBytes).addImm(0).addImm(0);
3298
3299 // Walk the register/memloc assignments, inserting copies/loads.
3300 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3301 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3302 CCValAssign const &VA = ArgLocs[i];
3303 const Value *ArgVal = OutVals[VA.getValNo()];
3304 MVT ArgVT = OutVTs[VA.getValNo()];
3305
3306 if (ArgVT == MVT::x86mmx)
3307 return false;
3308
3309 unsigned ArgReg = ArgRegs[VA.getValNo()];
3310
3311 // Promote the value if needed.
3312 switch (VA.getLocInfo()) {
3313 case CCValAssign::Full: break;
3314 case CCValAssign::SExt: {
3315 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3316 "Unexpected extend");
3317
3318 if (ArgVT == MVT::i1)
3319 return false;
3320
3321 bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3322 ArgVT, ArgReg);
3323 assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
3324 ArgVT = VA.getLocVT();
3325 break;
3326 }
3327 case CCValAssign::ZExt: {
3328 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3329 "Unexpected extend");
3330
3331 // Handle zero-extension from i1 to i8, which is common.
3332 if (ArgVT == MVT::i1) {
3333 // Set the high bits to zero.
3334 ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg);
3335 ArgVT = MVT::i8;
3336
3337 if (ArgReg == 0)
3338 return false;
3339 }
3340
3341 bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3342 ArgVT, ArgReg);
3343 assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
3344 ArgVT = VA.getLocVT();
3345 break;
3346 }
3347 case CCValAssign::AExt: {
3348 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3349 "Unexpected extend");
3350 bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
3351 ArgVT, ArgReg);
3352 if (!Emitted)
3353 Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
3354 ArgVT, ArgReg);
3355 if (!Emitted)
3356 Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3357 ArgVT, ArgReg);
3358
3359 assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
3360 ArgVT = VA.getLocVT();
3361 break;
3362 }
3363 case CCValAssign::BCvt: {
3364 ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg);
3365 assert(ArgReg && "Failed to emit a bitcast!");
3366 ArgVT = VA.getLocVT();
3367 break;
3368 }
3369 case CCValAssign::VExt:
3370 // VExt has not been implemented, so this should be impossible to reach
3371 // for now. However, fallback to Selection DAG isel once implemented.
3372 return false;
3376 case CCValAssign::FPExt:
3377 case CCValAssign::Trunc:
3378 llvm_unreachable("Unexpected loc info!");
3380 // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
3381 // support this.
3382 return false;
3383 }
3384
3385 if (VA.isRegLoc()) {
3386 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3387 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3388 OutRegs.push_back(VA.getLocReg());
3389 } else {
3390 assert(VA.isMemLoc() && "Unknown value location!");
3391
3392 // Don't emit stores for undef values.
3393 if (isa<UndefValue>(ArgVal))
3394 continue;
3395
3396 unsigned LocMemOffset = VA.getLocMemOffset();
3397 X86AddressMode AM;
3398 AM.Base.Reg = RegInfo->getStackRegister();
3399 AM.Disp = LocMemOffset;
3400 ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
3401 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3402 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3403 MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
3404 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3405 if (Flags.isByVal()) {
3406 X86AddressMode SrcAM;
3407 SrcAM.Base.Reg = ArgReg;
3408 if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
3409 return false;
3410 } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
3411 // If this is a really simple value, emit this with the Value* version
3412 // of X86FastEmitStore. If it isn't simple, we don't want to do this,
3413 // as it can cause us to reevaluate the argument.
3414 if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
3415 return false;
3416 } else {
3417 if (!X86FastEmitStore(ArgVT, ArgReg, AM, MMO))
3418 return false;
3419 }
3420 }
3421 }
3422
3423 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3424 // GOT pointer.
3425 if (Subtarget->isPICStyleGOT()) {
3426 unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3427 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3428 TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
3429 }
3430
3431 if (Is64Bit && IsVarArg && !IsWin64) {
3432 // From AMD64 ABI document:
3433 // For calls that may call functions that use varargs or stdargs
3434 // (prototype-less calls or calls to functions containing ellipsis (...) in
3435 // the declaration) %al is used as hidden argument to specify the number
3436 // of SSE registers used. The contents of %al do not need to match exactly
3437 // the number of registers, but must be an ubound on the number of SSE
3438 // registers used and is in the range 0 - 8 inclusive.
3439
3440 // Count the number of XMM registers allocated.
3441 static const MCPhysReg XMMArgRegs[] = {
3442 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3443 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3444 };
3445 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3446 assert((Subtarget->hasSSE1() || !NumXMMRegs)
3447 && "SSE registers cannot be used when SSE is disabled");
3448 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV8ri),
3449 X86::AL).addImm(NumXMMRegs);
3450 }
3451
3452 // Materialize callee address in a register. FIXME: GV address can be
3453 // handled with a CALLpcrel32 instead.
3454 X86AddressMode CalleeAM;
3455 if (!X86SelectCallAddress(Callee, CalleeAM))
3456 return false;
3457
3458 unsigned CalleeOp = 0;
3459 const GlobalValue *GV = nullptr;
3460 if (CalleeAM.GV != nullptr) {
3461 GV = CalleeAM.GV;
3462 } else if (CalleeAM.Base.Reg != 0) {
3463 CalleeOp = CalleeAM.Base.Reg;
3464 } else
3465 return false;
3466
3467 // Issue the call.
3469 if (CalleeOp) {
3470 // Register-indirect call.
3471 unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
3472 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CallOpc))
3473 .addReg(CalleeOp);
3474 } else {
3475 // Direct call.
3476 assert(GV && "Not a direct call");
3477 // See if we need any target-specific flags on the GV operand.
3478 unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3479
3480 // This will be a direct call, or an indirect call through memory for
3481 // NonLazyBind calls or dllimport calls.
3482 bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT ||
3483 OpFlags == X86II::MO_GOTPCREL ||
3484 OpFlags == X86II::MO_GOTPCREL_NORELAX ||
3485 OpFlags == X86II::MO_COFFSTUB;
3486 unsigned CallOpc = NeedLoad
3487 ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3488 : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3489
3490 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CallOpc));
3491 if (NeedLoad)
3492 MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
3493 if (Symbol)
3494 MIB.addSym(Symbol, OpFlags);
3495 else
3496 MIB.addGlobalAddress(GV, 0, OpFlags);
3497 if (NeedLoad)
3498 MIB.addReg(0);
3499 }
3500
3501 // Add a register mask operand representing the call-preserved registers.
3502 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3503 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3504
3505 // Add an implicit use GOT pointer in EBX.
3506 if (Subtarget->isPICStyleGOT())
3507 MIB.addReg(X86::EBX, RegState::Implicit);
3508
3509 if (Is64Bit && IsVarArg && !IsWin64)
3510 MIB.addReg(X86::AL, RegState::Implicit);
3511
3512 // Add implicit physical register uses to the call.
3513 for (auto Reg : OutRegs)
3514 MIB.addReg(Reg, RegState::Implicit);
3515
3516 // Issue CALLSEQ_END
3517 unsigned NumBytesForCalleeToPop =
3518 X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
3519 TM.Options.GuaranteedTailCallOpt)
3520 ? NumBytes // Callee pops everything.
3521 : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CB);
3522 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3523 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3524 .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
3525
3526 // Now handle call return values.
3528 CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
3529 CLI.RetTy->getContext());
3530 CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
3531
3532 // Copy all of the result registers out of their specified physreg.
3533 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3534 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3535 CCValAssign &VA = RVLocs[i];
3536 EVT CopyVT = VA.getValVT();
3537 unsigned CopyReg = ResultReg + i;
3538 Register SrcReg = VA.getLocReg();
3539
3540 // If this is x86-64, and we disabled SSE, we can't return FP values
3541 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3542 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3543 report_fatal_error("SSE register return with SSE disabled");
3544 }
3545
3546 // If we prefer to use the value in xmm registers, copy it out as f80 and
3547 // use a truncate to move it from fp stack reg to xmm reg.
3548 if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3549 isScalarFPTypeInSSEReg(VA.getValVT())) {
3550 CopyVT = MVT::f80;
3551 CopyReg = createResultReg(&X86::RFP80RegClass);
3552 }
3553
3554 // Copy out the result.
3555 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3556 TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
3557 InRegs.push_back(VA.getLocReg());
3558
3559 // Round the f80 to the right size, which also moves it to the appropriate
3560 // xmm register. This is accomplished by storing the f80 value in memory
3561 // and then loading it back.
3562 if (CopyVT != VA.getValVT()) {
3563 EVT ResVT = VA.getValVT();
3564 unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
3565 unsigned MemSize = ResVT.getSizeInBits()/8;
3566 int FI = MFI.CreateStackObject(MemSize, Align(MemSize), false);
3567 addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3568 TII.get(Opc)), FI)
3569 .addReg(CopyReg);
3570 Opc = ResVT == MVT::f32 ? X86::MOVSSrm_alt : X86::MOVSDrm_alt;
3571 addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3572 TII.get(Opc), ResultReg + i), FI);
3573 }
3574 }
3575
3576 CLI.ResultReg = ResultReg;
3577 CLI.NumResultRegs = RVLocs.size();
3578 CLI.Call = MIB;
3579
3580 return true;
3581}
3582
3583bool
3584X86FastISel::fastSelectInstruction(const Instruction *I) {
3585 switch (I->getOpcode()) {
3586 default: break;
3587 case Instruction::Load:
3588 return X86SelectLoad(I);
3589 case Instruction::Store:
3590 return X86SelectStore(I);
3591 case Instruction::Ret:
3592 return X86SelectRet(I);
3593 case Instruction::ICmp:
3594 case Instruction::FCmp:
3595 return X86SelectCmp(I);
3596 case Instruction::ZExt:
3597 return X86SelectZExt(I);
3598 case Instruction::SExt:
3599 return X86SelectSExt(I);
3600 case Instruction::Br:
3601 return X86SelectBranch(I);
3602 case Instruction::LShr:
3603 case Instruction::AShr:
3604 case Instruction::Shl:
3605 return X86SelectShift(I);
3606 case Instruction::SDiv:
3607 case Instruction::UDiv:
3608 case Instruction::SRem:
3609 case Instruction::URem:
3610 return X86SelectDivRem(I);
3611 case Instruction::Select:
3612 return X86SelectSelect(I);
3613 case Instruction::Trunc:
3614 return X86SelectTrunc(I);
3615 case Instruction::FPExt:
3616 return X86SelectFPExt(I);
3617 case Instruction::FPTrunc:
3618 return X86SelectFPTrunc(I);
3619 case Instruction::SIToFP:
3620 return X86SelectSIToFP(I);
3621 case Instruction::UIToFP:
3622 return X86SelectUIToFP(I);
3623 case Instruction::IntToPtr: // Deliberate fall-through.
3624 case Instruction::PtrToInt: {
3625 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
3626 EVT DstVT = TLI.getValueType(DL, I->getType());
3627 if (DstVT.bitsGT(SrcVT))
3628 return X86SelectZExt(I);
3629 if (DstVT.bitsLT(SrcVT))
3630 return X86SelectTrunc(I);
3631 Register Reg = getRegForValue(I->getOperand(0));
3632 if (Reg == 0) return false;
3633 updateValueMap(I, Reg);
3634 return true;
3635 }
3636 case Instruction::BitCast: {
3637 // Select SSE2/AVX bitcasts between 128/256/512 bit vector types.
3638 if (!Subtarget->hasSSE2())
3639 return false;
3640
3641 MVT SrcVT, DstVT;
3642 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT) ||
3643 !isTypeLegal(I->getType(), DstVT))
3644 return false;
3645
3646 // Only allow vectors that use xmm/ymm/zmm.
3647 if (!SrcVT.isVector() || !DstVT.isVector() ||
3648 SrcVT.getVectorElementType() == MVT::i1 ||
3649 DstVT.getVectorElementType() == MVT::i1)
3650 return false;
3651
3652 Register Reg = getRegForValue(I->getOperand(0));
3653 if (!Reg)
3654 return false;
3655
3656 // Emit a reg-reg copy so we don't propagate cached known bits information
3657 // with the wrong VT if we fall out of fast isel after selecting this.
3658 const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT);
3659 Register ResultReg = createResultReg(DstClass);
3660 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3661 TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg);
3662
3663 updateValueMap(I, ResultReg);
3664 return true;
3665 }
3666 }
3667
3668 return false;
3669}
3670
3671unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
3672 if (VT > MVT::i64)
3673 return 0;
3674
3675 uint64_t Imm = CI->getZExtValue();
3676 if (Imm == 0) {
3677 Register SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
3678 switch (VT.SimpleTy) {
3679 default: llvm_unreachable("Unexpected value type");
3680 case MVT::i1:
3681 case MVT::i8:
3682 return fastEmitInst_extractsubreg(MVT::i8, SrcReg, X86::sub_8bit);
3683 case MVT::i16:
3684 return fastEmitInst_extractsubreg(MVT::i16, SrcReg, X86::sub_16bit);
3685 case MVT::i32:
3686 return SrcReg;
3687 case MVT::i64: {
3688 Register ResultReg = createResultReg(&X86::GR64RegClass);
3689 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3690 TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
3691 .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
3692 return ResultReg;
3693 }
3694 }
3695 }
3696
3697 unsigned Opc = 0;
3698 switch (VT.SimpleTy) {
3699 default: llvm_unreachable("Unexpected value type");
3700 case MVT::i1:
3701 VT = MVT::i8;
3702 [[fallthrough]];
3703 case MVT::i8: Opc = X86::MOV8ri; break;
3704 case MVT::i16: Opc = X86::MOV16ri; break;
3705 case MVT::i32: Opc = X86::MOV32ri; break;
3706 case MVT::i64: {
3707 if (isUInt<32>(Imm))
3708 Opc = X86::MOV32ri64;
3709 else if (isInt<32>(Imm))
3710 Opc = X86::MOV64ri32;
3711 else
3712 Opc = X86::MOV64ri;
3713 break;
3714 }
3715 }
3716 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
3717}
3718
3719unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
3720 if (CFP->isNullValue())
3721 return fastMaterializeFloatZero(CFP);
3722
3723 // Can't handle alternate code models yet.
3724 CodeModel::Model CM = TM.getCodeModel();
3725 if (CM != CodeModel::Small && CM != CodeModel::Large)
3726 return 0;
3727
3728 // Get opcode and regclass of the output for the given load instruction.
3729 unsigned Opc = 0;
3730 bool HasSSE1 = Subtarget->hasSSE1();
3731 bool HasSSE2 = Subtarget->hasSSE2();
3732 bool HasAVX = Subtarget->hasAVX();
3733 bool HasAVX512 = Subtarget->hasAVX512();
3734 switch (VT.SimpleTy) {
3735 default: return 0;
3736 case MVT::f32:
3737 Opc = HasAVX512 ? X86::VMOVSSZrm_alt
3738 : HasAVX ? X86::VMOVSSrm_alt
3739 : HasSSE1 ? X86::MOVSSrm_alt
3740 : X86::LD_Fp32m;
3741 break;
3742 case MVT::f64:
3743 Opc = HasAVX512 ? X86::VMOVSDZrm_alt
3744 : HasAVX ? X86::VMOVSDrm_alt
3745 : HasSSE2 ? X86::MOVSDrm_alt
3746 : X86::LD_Fp64m;
3747 break;
3748 case MVT::f80:
3749 // No f80 support yet.
3750 return 0;
3751 }
3752
3753 // MachineConstantPool wants an explicit alignment.
3754 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
3755
3756 // x86-32 PIC requires a PIC base register for constant pools.
3757 unsigned PICBase = 0;
3758 unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
3759 if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
3760 PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3761 else if (OpFlag == X86II::MO_GOTOFF)
3762 PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
3763 else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
3764 PICBase = X86::RIP;
3765
3766 // Create the load from the constant pool.
3767 unsigned CPI = MCP.getConstantPoolIndex(CFP, Alignment);
3768 Register ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
3769
3770 // Large code model only applies to 64-bit mode.
3771 if (Subtarget->is64Bit() && CM == CodeModel::Large) {
3772 Register AddrReg = createResultReg(&X86::GR64RegClass);
3773 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV64ri),
3774 AddrReg)
3775 .addConstantPoolIndex(CPI, 0, OpFlag);
3776 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3777 TII.get(Opc), ResultReg);
3778 addRegReg(MIB, AddrReg, false, PICBase, false);
3779 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3781 MachineMemOperand::MOLoad, DL.getPointerSize(), Alignment);
3782 MIB->addMemOperand(*FuncInfo.MF, MMO);
3783 return ResultReg;
3784 }
3785
3787 TII.get(Opc), ResultReg),
3788 CPI, PICBase, OpFlag);
3789 return ResultReg;
3790}
3791
3792unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
3793 // Can't handle alternate code models yet.
3794 if (TM.getCodeModel() != CodeModel::Small)
3795 return 0;
3796
3797 // Materialize addresses with LEA/MOV instructions.
3798 X86AddressMode AM;
3799 if (X86SelectAddress(GV, AM)) {
3800 // If the expression is just a basereg, then we're done, otherwise we need
3801 // to emit an LEA.
3803 AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
3804 return AM.Base.Reg;
3805
3806 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3807 if (TM.getRelocationModel() == Reloc::Static &&
3808 TLI.getPointerTy(DL) == MVT::i64) {
3809 // The displacement code could be more than 32 bits away so we need to use
3810 // an instruction with a 64 bit immediate
3811 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(X86::MOV64ri),
3812 ResultReg)
3813 .addGlobalAddress(GV);
3814 } else {
3815 unsigned Opc =
3816 TLI.getPointerTy(DL) == MVT::i32
3817 ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3818 : X86::LEA64r;
3819 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3820 TII.get(Opc), ResultReg), AM);
3821 }
3822 return ResultReg;
3823 }
3824 return 0;
3825}
3826
3827unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
3828 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
3829
3830 // Only handle simple types.
3831 if (!CEVT.isSimple())
3832 return 0;
3833 MVT VT = CEVT.getSimpleVT();
3834
3835 if (const auto *CI = dyn_cast<ConstantInt>(C))
3836 return X86MaterializeInt(CI, VT);
3837 if (const auto *CFP = dyn_cast<ConstantFP>(C))
3838 return X86MaterializeFP(CFP, VT);
3839 if (const auto *GV = dyn_cast<GlobalValue>(C))
3840 return X86MaterializeGV(GV, VT);
3841 if (isa<UndefValue>(C)) {
3842 unsigned Opc = 0;
3843 switch (VT.SimpleTy) {
3844 default:
3845 break;
3846 case MVT::f32:
3847 if (!Subtarget->hasSSE1())
3848 Opc = X86::LD_Fp032;
3849 break;
3850 case MVT::f64:
3851 if (!Subtarget->hasSSE2())
3852 Opc = X86::LD_Fp064;
3853 break;
3854 case MVT::f80:
3855 Opc = X86::LD_Fp080;
3856 break;
3857 }
3858
3859 if (Opc) {
3860 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3861 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
3862 ResultReg);
3863 return ResultReg;
3864 }
3865 }
3866
3867 return 0;
3868}
3869
3870unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
3871 // Fail on dynamic allocas. At this point, getRegForValue has already
3872 // checked its CSE maps, so if we're here trying to handle a dynamic
3873 // alloca, we're not going to succeed. X86SelectAddress has a
3874 // check for dynamic allocas, because it's called directly from
3875 // various places, but targetMaterializeAlloca also needs a check
3876 // in order to avoid recursion between getRegForValue,
3877 // X86SelectAddrss, and targetMaterializeAlloca.
3878 if (!FuncInfo.StaticAllocaMap.count(C))
3879 return 0;
3880 assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
3881
3882 X86AddressMode AM;
3883 if (!X86SelectAddress(C, AM))
3884 return 0;
3885 unsigned Opc =
3886 TLI.getPointerTy(DL) == MVT::i32
3887 ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
3888 : X86::LEA64r;
3889 const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
3890 Register ResultReg = createResultReg(RC);
3891 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3892 TII.get(Opc), ResultReg), AM);
3893 return ResultReg;
3894}
3895
3896unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
3897 MVT VT;
3898 if (!isTypeLegal(CF->getType(), VT))
3899 return 0;
3900
3901 // Get opcode and regclass for the given zero.
3902 bool HasSSE1 = Subtarget->hasSSE1();
3903 bool HasSSE2 = Subtarget->hasSSE2();
3904 bool HasAVX512 = Subtarget->hasAVX512();
3905 unsigned Opc = 0;
3906 switch (VT.SimpleTy) {
3907 default: return 0;
3908 case MVT::f16:
3909 Opc = HasAVX512 ? X86::AVX512_FsFLD0SH : X86::FsFLD0SH;
3910 break;
3911 case MVT::f32:
3912 Opc = HasAVX512 ? X86::AVX512_FsFLD0SS
3913 : HasSSE1 ? X86::FsFLD0SS
3914 : X86::LD_Fp032;
3915 break;
3916 case MVT::f64:
3917 Opc = HasAVX512 ? X86::AVX512_FsFLD0SD
3918 : HasSSE2 ? X86::FsFLD0SD
3919 : X86::LD_Fp064;
3920 break;
3921 case MVT::f80:
3922 // No f80 support yet.
3923 return 0;
3924 }
3925
3926 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3927 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg);
3928 return ResultReg;
3929}
3930
3931
3932bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
3933 const LoadInst *LI) {
3934 const Value *Ptr = LI->getPointerOperand();
3935 X86AddressMode AM;
3936 if (!X86SelectAddress(Ptr, AM))
3937 return false;
3938
3939 const X86InstrInfo &XII = (const X86InstrInfo &)TII;
3940
3941 unsigned Size = DL.getTypeAllocSize(LI->getType());
3942
3944 AM.getFullAddress(AddrOps);
3945
3947 *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, LI->getAlign(),
3948 /*AllowCommute=*/true);
3949 if (!Result)
3950 return false;
3951
3952 // The index register could be in the wrong register class. Unfortunately,
3953 // foldMemoryOperandImpl could have commuted the instruction so its not enough
3954 // to just look at OpNo + the offset to the index reg. We actually need to
3955 // scan the instruction to find the index reg and see if its the correct reg
3956 // class.
3957 unsigned OperandNo = 0;
3958 for (MachineInstr::mop_iterator I = Result->operands_begin(),
3959 E = Result->operands_end(); I != E; ++I, ++OperandNo) {
3960 MachineOperand &MO = *I;
3961 if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
3962 continue;
3963 // Found the index reg, now try to rewrite it.
3964 Register IndexReg = constrainOperandRegClass(Result->getDesc(),
3965 MO.getReg(), OperandNo);
3966 if (IndexReg == MO.getReg())
3967 continue;
3968 MO.setReg(IndexReg);
3969 }
3970
3971 Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3972 Result->cloneInstrSymbols(*FuncInfo.MF, *MI);
3974 removeDeadCode(I, std::next(I));
3975 return true;
3976}
3977
3978unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
3979 const TargetRegisterClass *RC,
3980 unsigned Op0, unsigned Op1,
3981 unsigned Op2, unsigned Op3) {
3982 const MCInstrDesc &II = TII.get(MachineInstOpcode);
3983
3984 Register ResultReg = createResultReg(RC);
3985 Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
3986 Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
3987 Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
3988 Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
3989
3990 if (II.getNumDefs() >= 1)
3991 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
3992 .addReg(Op0)
3993 .addReg(Op1)
3994 .addReg(Op2)
3995 .addReg(Op3);
3996 else {
3997 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
3998 .addReg(Op0)
3999 .addReg(Op1)
4000 .addReg(Op2)
4001 .addReg(Op3);
4002 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
4003 ResultReg)
4004 .addReg(II.implicit_defs()[0]);
4005 }
4006 return ResultReg;
4007}
4008
4009
4010namespace llvm {
4012 const TargetLibraryInfo *libInfo) {
4013 return new X86FastISel(funcInfo, libInfo);
4014 }
4015}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
This file defines the FastISel class.
Hexagon Common GEP
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
typename CallsiteContextGraph< DerivedCCG, FuncTy, CallTy >::FuncInfo FuncInfo
const char LLVMTargetMachineRef TM
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned OpIndex
@ Flags
Definition: TextStubV5.cpp:93
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
If we have a comparison with RHS as the RHS of the comparison, return an opcode that works for the co...
static std::pair< unsigned, bool > getX86SSEConditionCode(CmpInst::Predicate Predicate)
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, const CallBase *CB)
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
static void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, X86AddressMode &AM)
Value * RHS
Value * LHS
an instruction to allocate memory on the stack
Definition: Instructions.h:58
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:88
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
CCState - This class holds information needed while lowering arguments and return values.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
unsigned getLocMemOffset() const
Register getLocReg() const
LocInfo getLocInfo() const
bool isMemLoc() const
unsigned getValNo() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1186
bool arg_empty() const
Definition: InstrTypes.h:1350
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1408
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1353
unsigned arg_size() const
Definition: InstrTypes.h:1351
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:708
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:721
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:735
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:724
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:733
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:722
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:723
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:732
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:726
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:729
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:730
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:725
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:727
@ ICMP_NE
not equal
Definition: InstrTypes.h:740
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:734
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:731
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:720
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:728
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:832
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:808
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1002
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2110
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2096
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:260
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:151
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:145
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
bool isValidLocationForIntrinsic(const DILocation *DL) const
Check that a location is valid for this variable.
This represents the llvm.dbg.declare instruction.
Value * getAddress() const
DILocalVariable * getVariable() const
DIExpression * getExpression() const
A debug info location.
Definition: DebugLoc.h:33
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
virtual unsigned fastMaterializeFloatZero(const ConstantFP *CF)
Emit the floating-point constant +0.0 in a register using target- specific logic.
Definition: FastISel.h:476
virtual bool fastLowerIntrinsicCall(const IntrinsicInst *II)
This method is called by target-independent code to do target- specific intrinsic lowering.
Definition: FastISel.cpp:1828
virtual bool tryToFoldLoadIntoMI(MachineInstr *, unsigned, const LoadInst *)
The specified machine instr operand is a vreg, and that vreg is being provided by the specified load ...
Definition: FastISel.h:300
virtual unsigned fastMaterializeConstant(const Constant *C)
Emit a constant in a register using target-specific logic, such as constant pool loads.
Definition: FastISel.h:469
virtual bool fastLowerCall(CallLoweringInfo &CLI)
This method is called by target-independent code to do target- specific call lowering.
Definition: FastISel.cpp:1826
virtual bool fastLowerArguments()
This method is called by target-independent code to do target- specific argument lowering.
Definition: FastISel.cpp:1824
virtual bool fastSelectInstruction(const Instruction *I)=0
This method is called by target-independent code when the normal FastISel process fails to select an ...
const TargetMachine & TM
Definition: FastISel.h:209
virtual unsigned fastMaterializeAlloca(const AllocaInst *C)
Emit an alloca address in a register using target-specific logic.
Definition: FastISel.h:472
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:179
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:358
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const BasicBlock * getParent() const
Definition: Instruction.h:90
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
An instruction for reading from memory.
Definition: Instructions.h:177
Value * getPointerOperand()
Definition: Instructions.h:264
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:220
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:797
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:580
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Machine Value Type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setStackProtectorIndex(int I)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:519
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:513
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getRawDest() const
unsigned getDestAddressSpace() const
bool isVolatile() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
unsigned getSourceAddressSpace() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Return a value (possibly void), from a function.
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Align getAlign() const
Definition: Instructions.h:345
Value * getValueOperand()
Definition: Instructions.h:390
Value * getPointerOperand()
Definition: Instructions.h:393
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:623
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:653
Class to represent struct types.
Definition: DerivedTypes.h:213
Provides information about what library functions are available for the current target.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:627
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:267
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:255
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:252
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:231
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool hasSSE1() const
Definition: X86Subtarget.h:199
bool isTargetMCU() const
Definition: X86Subtarget.h:308
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:289
bool hasAVX512() const
Definition: X86Subtarget.h:207
bool hasSSE2() const
Definition: X86Subtarget.h:200
bool hasAVX() const
Definition: X86Subtarget.h:205
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ X86_64_SysV
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:148
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:82
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ X86_ThisCall
Similar to X86_StdCall.
Definition: CallingConv.h:119
@ X86_StdCall
stdcall is mostly used by the Win32 API.
Definition: CallingConv.h:96
@ WebKit_JS
Used for stack based JavaScript calls.
Definition: CallingConv.h:56
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:156
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:100
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:779
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:898
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:773
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:619
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:776
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:679
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1447
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
@ MO_GOTPCREL_NORELAX
MO_GOTPCREL_NORELAX - Same as MO_GOTPCREL except that R_X86_64_GOTPCREL relocations are guaranteed to...
Definition: X86BaseInfo.h:447
@ MO_GOTOFF
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:434
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: X86BaseInfo.h:575
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the reference is actually to the "__imp...
Definition: X86BaseInfo.h:537
@ MO_PIC_BASE_OFFSET
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:420
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:442
@ LAST_VALID_COND
Definition: X86BaseInfo.h:97
@ AddrIndexReg
Definition: X86BaseInfo.h:34
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given register size in bytes, and operand type.
constexpr double e
Definition: MathExtras.h:31
@ Emitted
Assigned address, still materializing.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
static bool isGlobalStubReference(unsigned char TargetFlag)
isGlobalStubReference - Return true if the specified TargetFlag operand is a reference to a stub for ...
Definition: X86InstrInfo.h:82
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
isGlobalRelativeToPICBase - Return true if the specified global value reference is relative to a 32-b...
Definition: X86InstrInfo.h:100
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:53
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto successors(const MachineBasicBlock *BB)
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
addConstantPoolReference - This function is used to add a reference to the base of a constant value s...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
static const MachineInstrBuilder & addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2)
addRegReg - This function is used to add a memory reference of the form: [Reg + Reg].
unsigned getKillRegState(bool B)
gep_type_iterator gep_type_begin(const User *GEP)
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg)
addDirectMem - This function is used to add a direct memory reference to the current instruction – th...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:267
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:283
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:351
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:299
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
X86AddressMode - This struct holds a generalized full x86 address mode.
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
const GlobalValue * GV
union llvm::X86AddressMode::@599 Base
enum llvm::X86AddressMode::@598 BaseType