LLVM 22.0.0git
PPCFastISel.cpp
Go to the documentation of this file.
1//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PowerPC-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// PPCGenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCISelLowering.h"
20#include "PPCSelectionDAGInfo.h"
21#include "PPCSubtarget.h"
30#include "llvm/IR/CallingConv.h"
33#include "llvm/IR/Operator.h"
35
36//===----------------------------------------------------------------------===//
37//
38// TBD:
39// fastLowerArguments: Handle simple cases.
40// PPCMaterializeGV: Handle TLS.
41// SelectCall: Handle function pointers.
42// SelectCall: Handle multi-register return values.
43// SelectCall: Optimize away nops for local calls.
44// processCallArgs: Handle bit-converted arguments.
45// finishCall: Handle multi-register return values.
46// PPCComputeAddress: Handle parameter references as FrameIndex's.
47// PPCEmitCmp: Handle immediate as operand 1.
48// SelectCall: Handle small byval arguments.
49// SelectIntrinsicCall: Implement.
50// SelectSelect: Implement.
51// Consider factoring isTypeLegal into the base class.
52// Implement switches and jump tables.
53//
54//===----------------------------------------------------------------------===//
55using namespace llvm;
56
57#define DEBUG_TYPE "ppcfastisel"
58
59namespace {
60
61struct Address {
62 enum {
63 RegBase,
64 FrameIndexBase
65 } BaseType;
66
67 union {
68 unsigned Reg;
69 int FI;
70 } Base;
71
72 int64_t Offset;
73
74 // Innocuous defaults for our address.
75 Address()
76 : BaseType(RegBase), Offset(0) {
77 Base.Reg = 0;
78 }
79};
80
81class PPCFastISel final : public FastISel {
82
83 const TargetMachine &TM;
84 const PPCSubtarget *Subtarget;
85 PPCFunctionInfo *PPCFuncInfo;
86 const TargetInstrInfo &TII;
87 const TargetLowering &TLI;
88 LLVMContext *Context;
89
90 public:
91 explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
92 const TargetLibraryInfo *LibInfo)
93 : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
94 Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
95 PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
96 TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()),
97 Context(&FuncInfo.Fn->getContext()) {}
98
99 // Backend specific FastISel code.
100 private:
101 bool fastSelectInstruction(const Instruction *I) override;
102 Register fastMaterializeConstant(const Constant *C) override;
103 Register fastMaterializeAlloca(const AllocaInst *AI) override;
104 bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
105 const LoadInst *LI) override;
106 bool fastLowerArguments() override;
107 Register fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
108 Register fastEmitInst_ri(unsigned MachineInstOpcode,
109 const TargetRegisterClass *RC, Register Op0,
110 uint64_t Imm);
111 Register fastEmitInst_r(unsigned MachineInstOpcode,
112 const TargetRegisterClass *RC, Register Op0);
113 Register fastEmitInst_rr(unsigned MachineInstOpcode,
114 const TargetRegisterClass *RC, Register Op0,
115 Register Op1);
116
117 bool fastLowerCall(CallLoweringInfo &CLI) override;
118
119 // Instruction selection routines.
120 private:
121 bool SelectLoad(const Instruction *I);
122 bool SelectStore(const Instruction *I);
123 bool SelectBranch(const Instruction *I);
124 bool SelectIndirectBr(const Instruction *I);
125 bool SelectFPExt(const Instruction *I);
126 bool SelectFPTrunc(const Instruction *I);
127 bool SelectIToFP(const Instruction *I, bool IsSigned);
128 bool SelectFPToI(const Instruction *I, bool IsSigned);
129 bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
130 bool SelectRet(const Instruction *I);
131 bool SelectTrunc(const Instruction *I);
132 bool SelectIntExt(const Instruction *I);
133
134 // Utility routines.
135 private:
136 bool isTypeLegal(Type *Ty, MVT &VT);
137 bool isLoadTypeLegal(Type *Ty, MVT &VT);
138 bool isValueAvailable(const Value *V) const;
139 bool isVSFRCRegClass(const TargetRegisterClass *RC) const {
140 return RC->getID() == PPC::VSFRCRegClassID;
141 }
142 bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
143 return RC->getID() == PPC::VSSRCRegClassID;
144 }
145 Register copyRegToRegClass(const TargetRegisterClass *ToRC, Register SrcReg,
146 unsigned Flag = 0, unsigned SubReg = 0) {
147 Register TmpReg = createResultReg(ToRC);
148 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
149 TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg, Flag, SubReg);
150 return TmpReg;
151 }
152 bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt,
153 Register DestReg, const PPC::Predicate Pred);
154 bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
155 const TargetRegisterClass *RC, bool IsZExt = true,
156 unsigned FP64LoadOpc = PPC::LFD);
157 bool PPCEmitStore(MVT VT, Register SrcReg, Address &Addr);
158 bool PPCComputeAddress(const Value *Obj, Address &Addr);
159 void PPCSimplifyAddress(Address &Addr, bool &UseOffset, Register &IndexReg);
160 bool PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, Register DestReg,
161 bool IsZExt);
162 Register PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
163 Register PPCMaterializeGV(const GlobalValue *GV, MVT VT);
164 Register PPCMaterializeInt(const ConstantInt *CI, MVT VT,
165 bool UseSExt = true);
166 Register PPCMaterialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
167 Register PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC);
168 Register PPCMoveToIntReg(const Instruction *I, MVT VT, Register SrcReg,
169 bool IsSigned);
170 Register PPCMoveToFPReg(MVT VT, Register SrcReg, bool IsSigned);
171
172 // Call handling routines.
173 private:
174 bool processCallArgs(SmallVectorImpl<Value *> &Args,
176 SmallVectorImpl<MVT> &ArgVTs,
179 unsigned &NumBytes, bool IsVarArg);
180 bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
181
182 private:
183 #include "PPCGenFastISel.inc"
184
185};
186
187} // end anonymous namespace
188
189static std::optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
190 switch (Pred) {
191 // These are not representable with any single compare.
194 // Major concern about the following 6 cases is NaN result. The comparison
195 // result consists of 4 bits, indicating lt, eq, gt and un (unordered),
196 // only one of which will be set. The result is generated by fcmpu
197 // instruction. However, bc instruction only inspects one of the first 3
198 // bits, so when un is set, bc instruction may jump to an undesired
199 // place.
200 //
201 // More specifically, if we expect an unordered comparison and un is set, we
202 // expect to always go to true branch; in such case UEQ, UGT and ULT still
203 // give false, which are undesired; but UNE, UGE, ULE happen to give true,
204 // since they are tested by inspecting !eq, !lt, !gt, respectively.
205 //
206 // Similarly, for ordered comparison, when un is set, we always expect the
207 // result to be false. In such case OGT, OLT and OEQ is good, since they are
208 // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
209 // and ONE are tested through !lt, !gt and !eq, and these are true.
216 default:
217 return std::nullopt;
218
220 case CmpInst::ICMP_EQ:
221 return PPC::PRED_EQ;
222
226 return PPC::PRED_GT;
227
231 return PPC::PRED_GE;
232
236 return PPC::PRED_LT;
237
241 return PPC::PRED_LE;
242
244 case CmpInst::ICMP_NE:
245 return PPC::PRED_NE;
246
248 return PPC::PRED_NU;
249
251 return PPC::PRED_UN;
252 }
253}
254
255// Determine whether the type Ty is simple enough to be handled by
256// fast-isel, and return its equivalent machine type in VT.
257// FIXME: Copied directly from ARM -- factor into base class?
258bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
259 EVT Evt = TLI.getValueType(DL, Ty, true);
260
261 // Only handle simple types.
262 if (Evt == MVT::Other || !Evt.isSimple()) return false;
263 VT = Evt.getSimpleVT();
264
265 // Handle all legal types, i.e. a register that will directly hold this
266 // value.
267 return TLI.isTypeLegal(VT);
268}
269
270// Determine whether the type Ty is simple enough to be handled by
271// fast-isel as a load target, and return its equivalent machine type in VT.
272bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
273 if (isTypeLegal(Ty, VT)) return true;
274
275 // If this is a type than can be sign or zero-extended to a basic operation
276 // go ahead and accept it now.
277 if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
278 return true;
279 }
280
281 return false;
282}
283
284bool PPCFastISel::isValueAvailable(const Value *V) const {
285 if (!isa<Instruction>(V))
286 return true;
287
288 const auto *I = cast<Instruction>(V);
289 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB;
290}
291
292// Given a value Obj, create an Address object Addr that represents its
293// address. Return false if we can't handle it.
294bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
295 const User *U = nullptr;
296 unsigned Opcode = Instruction::UserOp1;
297 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
298 // Don't walk into other basic blocks unless the object is an alloca from
299 // another block, otherwise it may not have a virtual register assigned.
300 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
301 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
302 Opcode = I->getOpcode();
303 U = I;
304 }
305 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
306 Opcode = C->getOpcode();
307 U = C;
308 }
309
310 switch (Opcode) {
311 default:
312 break;
313 case Instruction::BitCast:
314 // Look through bitcasts.
315 return PPCComputeAddress(U->getOperand(0), Addr);
316 case Instruction::IntToPtr:
317 // Look past no-op inttoptrs.
318 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
319 TLI.getPointerTy(DL))
320 return PPCComputeAddress(U->getOperand(0), Addr);
321 break;
322 case Instruction::PtrToInt:
323 // Look past no-op ptrtoints.
324 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
325 return PPCComputeAddress(U->getOperand(0), Addr);
326 break;
327 case Instruction::GetElementPtr: {
328 Address SavedAddr = Addr;
329 int64_t TmpOffset = Addr.Offset;
330
331 // Iterate through the GEP folding the constants into offsets where
332 // we can.
334 for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
335 II != IE; ++II, ++GTI) {
336 const Value *Op = *II;
337 if (StructType *STy = GTI.getStructTypeOrNull()) {
338 const StructLayout *SL = DL.getStructLayout(STy);
339 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
340 TmpOffset += SL->getElementOffset(Idx);
341 } else {
342 uint64_t S = GTI.getSequentialElementStride(DL);
343 for (;;) {
344 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
345 // Constant-offset addressing.
346 TmpOffset += CI->getSExtValue() * S;
347 break;
348 }
349 if (canFoldAddIntoGEP(U, Op)) {
350 // A compatible add with a constant operand. Fold the constant.
351 ConstantInt *CI =
352 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
353 TmpOffset += CI->getSExtValue() * S;
354 // Iterate on the other operand.
355 Op = cast<AddOperator>(Op)->getOperand(0);
356 continue;
357 }
358 // Unsupported
359 goto unsupported_gep;
360 }
361 }
362 }
363
364 // Try to grab the base operand now.
365 Addr.Offset = TmpOffset;
366 if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
367
368 // We failed, restore everything and try the other options.
369 Addr = SavedAddr;
370
371 unsupported_gep:
372 break;
373 }
374 case Instruction::Alloca: {
375 const AllocaInst *AI = cast<AllocaInst>(Obj);
376 DenseMap<const AllocaInst*, int>::iterator SI =
377 FuncInfo.StaticAllocaMap.find(AI);
378 if (SI != FuncInfo.StaticAllocaMap.end()) {
379 Addr.BaseType = Address::FrameIndexBase;
380 Addr.Base.FI = SI->second;
381 return true;
382 }
383 break;
384 }
385 }
386
387 // FIXME: References to parameters fall through to the behavior
388 // below. They should be able to reference a frame index since
389 // they are stored to the stack, so we can get "ld rx, offset(r1)"
390 // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
391 // just contain the parameter. Try to handle this with a FI.
392
393 // Try to get this in a register if nothing else has worked.
394 if (Addr.Base.Reg == 0)
395 Addr.Base.Reg = getRegForValue(Obj);
396
397 // Prevent assignment of base register to X0, which is inappropriate
398 // for loads and stores alike.
399 if (Addr.Base.Reg != 0)
400 MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
401
402 return Addr.Base.Reg != 0;
403}
404
405// Fix up some addresses that can't be used directly. For example, if
406// an offset won't fit in an instruction field, we may need to move it
407// into an index register.
408void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
409 Register &IndexReg) {
410
411 // Check whether the offset fits in the instruction field.
412 if (!isInt<16>(Addr.Offset))
413 UseOffset = false;
414
415 // If this is a stack pointer and the offset needs to be simplified then
416 // put the alloca address into a register, set the base type back to
417 // register and continue. This should almost never happen.
418 if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
419 Register ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
420 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDI8),
421 ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
422 Addr.Base.Reg = ResultReg;
423 Addr.BaseType = Address::RegBase;
424 }
425
426 if (!UseOffset) {
427 IntegerType *OffsetTy = Type::getInt64Ty(*Context);
428 const ConstantInt *Offset = ConstantInt::getSigned(OffsetTy, Addr.Offset);
429 IndexReg = PPCMaterializeInt(Offset, MVT::i64);
430 assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
431 }
432}
433
434// Emit a load instruction if possible, returning true if we succeeded,
435// otherwise false. See commentary below for how the register class of
436// the load is determined.
437bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
438 const TargetRegisterClass *RC,
439 bool IsZExt, unsigned FP64LoadOpc) {
440 unsigned Opc;
441 bool UseOffset = true;
442 bool HasSPE = Subtarget->hasSPE();
443
444 // If ResultReg is given, it determines the register class of the load.
445 // Otherwise, RC is the register class to use. If the result of the
446 // load isn't anticipated in this block, both may be zero, in which
447 // case we must make a conservative guess. In particular, don't assign
448 // R0 or X0 to the result register, as the result may be used in a load,
449 // store, add-immediate, or isel that won't permit this. (Though
450 // perhaps the spill and reload of live-exit values would handle this?)
451 const TargetRegisterClass *UseRC =
452 (ResultReg ? MRI.getRegClass(ResultReg) :
453 (RC ? RC :
454 (VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
455 (VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) :
456 (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
457 &PPC::GPRC_and_GPRC_NOR0RegClass)))));
458
459 bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
460
461 switch (VT.SimpleTy) {
462 default: // e.g., vector types not handled
463 return false;
464 case MVT::i8:
465 Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
466 break;
467 case MVT::i16:
468 Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
469 : (Is32BitInt ? PPC::LHA : PPC::LHA8));
470 break;
471 case MVT::i32:
472 Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
473 : (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
474 if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
475 UseOffset = false;
476 break;
477 case MVT::i64:
478 Opc = PPC::LD;
479 assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
480 "64-bit load with 32-bit target??");
481 UseOffset = ((Addr.Offset & 3) == 0);
482 break;
483 case MVT::f32:
484 Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
485 break;
486 case MVT::f64:
487 Opc = FP64LoadOpc;
488 break;
489 }
490
491 // If necessary, materialize the offset into a register and use
492 // the indexed form. Also handle stack pointers with special needs.
493 Register IndexReg;
494 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
495
496 // If this is a potential VSX load with an offset of 0, a VSX indexed load can
497 // be used.
498 bool IsVSSRC = isVSSRCRegClass(UseRC);
499 bool IsVSFRC = isVSFRCRegClass(UseRC);
500 bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
501 bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
502 if ((Is32VSXLoad || Is64VSXLoad) &&
503 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
504 (Addr.Offset == 0)) {
505 UseOffset = false;
506 }
507
508 if (!ResultReg)
509 ResultReg = createResultReg(UseRC);
510
511 // Note: If we still have a frame index here, we know the offset is
512 // in range, as otherwise PPCSimplifyAddress would have converted it
513 // into a RegBase.
514 if (Addr.BaseType == Address::FrameIndexBase) {
515 // VSX only provides an indexed load.
516 if (Is32VSXLoad || Is64VSXLoad) return false;
517
518 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
519 MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
520 Addr.Offset),
521 MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
522 MFI.getObjectAlign(Addr.Base.FI));
523
524 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
525 .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
526
527 // Base reg with offset in range.
528 } else if (UseOffset) {
529 // VSX only provides an indexed load.
530 if (Is32VSXLoad || Is64VSXLoad) return false;
531
532 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
533 .addImm(Addr.Offset).addReg(Addr.Base.Reg);
534
535 // Indexed form.
536 } else {
537 // Get the RR opcode corresponding to the RI one. FIXME: It would be
538 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
539 // is hard to get at.
540 switch (Opc) {
541 default: llvm_unreachable("Unexpected opcode!");
542 case PPC::LBZ: Opc = PPC::LBZX; break;
543 case PPC::LBZ8: Opc = PPC::LBZX8; break;
544 case PPC::LHZ: Opc = PPC::LHZX; break;
545 case PPC::LHZ8: Opc = PPC::LHZX8; break;
546 case PPC::LHA: Opc = PPC::LHAX; break;
547 case PPC::LHA8: Opc = PPC::LHAX8; break;
548 case PPC::LWZ: Opc = PPC::LWZX; break;
549 case PPC::LWZ8: Opc = PPC::LWZX8; break;
550 case PPC::LWA: Opc = PPC::LWAX; break;
551 case PPC::LWA_32: Opc = PPC::LWAX_32; break;
552 case PPC::LD: Opc = PPC::LDX; break;
553 case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
554 case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
555 case PPC::EVLDD: Opc = PPC::EVLDDX; break;
556 case PPC::SPELWZ: Opc = PPC::SPELWZX; break;
557 }
558
559 auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
560 ResultReg);
561
562 // If we have an index register defined we use it in the store inst,
563 // otherwise we use X0 as base as it makes the vector instructions to
564 // use zero in the computation of the effective address regardless the
565 // content of the register.
566 if (IndexReg)
567 MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
568 else
569 MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
570 }
571
572 return true;
573}
574
575// Attempt to fast-select a load instruction.
576bool PPCFastISel::SelectLoad(const Instruction *I) {
577 // FIXME: No atomic loads are supported.
578 if (cast<LoadInst>(I)->isAtomic())
579 return false;
580
581 // Verify we have a legal type before going any further.
582 MVT VT;
583 if (!isLoadTypeLegal(I->getType(), VT))
584 return false;
585
586 // See if we can handle this address.
587 Address Addr;
588 if (!PPCComputeAddress(I->getOperand(0), Addr))
589 return false;
590
591 // Look at the currently assigned register for this instruction
592 // to determine the required register class. This is necessary
593 // to constrain RA from using R0/X0 when this is not legal.
594 Register AssignedReg = FuncInfo.ValueMap[I];
595 const TargetRegisterClass *RC =
596 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
597
598 Register ResultReg = 0;
599 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true,
600 Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
601 return false;
602 updateValueMap(I, ResultReg);
603 return true;
604}
605
606// Emit a store instruction to store SrcReg at Addr.
607bool PPCFastISel::PPCEmitStore(MVT VT, Register SrcReg, Address &Addr) {
608 assert(SrcReg && "Nothing to store!");
609 unsigned Opc;
610 bool UseOffset = true;
611
612 const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
613 bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
614
615 switch (VT.SimpleTy) {
616 default: // e.g., vector types not handled
617 return false;
618 case MVT::i8:
619 Opc = Is32BitInt ? PPC::STB : PPC::STB8;
620 break;
621 case MVT::i16:
622 Opc = Is32BitInt ? PPC::STH : PPC::STH8;
623 break;
624 case MVT::i32:
625 assert(Is32BitInt && "Not GPRC for i32??");
626 Opc = PPC::STW;
627 break;
628 case MVT::i64:
629 Opc = PPC::STD;
630 UseOffset = ((Addr.Offset & 3) == 0);
631 break;
632 case MVT::f32:
633 Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
634 break;
635 case MVT::f64:
636 Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
637 break;
638 }
639
640 // If necessary, materialize the offset into a register and use
641 // the indexed form. Also handle stack pointers with special needs.
642 Register IndexReg;
643 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
644
645 // If this is a potential VSX store with an offset of 0, a VSX indexed store
646 // can be used.
647 bool IsVSSRC = isVSSRCRegClass(RC);
648 bool IsVSFRC = isVSFRCRegClass(RC);
649 bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
650 bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
651 if ((Is32VSXStore || Is64VSXStore) &&
652 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
653 (Addr.Offset == 0)) {
654 UseOffset = false;
655 }
656
657 // Note: If we still have a frame index here, we know the offset is
658 // in range, as otherwise PPCSimplifyAddress would have converted it
659 // into a RegBase.
660 if (Addr.BaseType == Address::FrameIndexBase) {
661 // VSX only provides an indexed store.
662 if (Is32VSXStore || Is64VSXStore) return false;
663
664 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
665 MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
666 Addr.Offset),
667 MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
668 MFI.getObjectAlign(Addr.Base.FI));
669
670 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
671 .addReg(SrcReg)
672 .addImm(Addr.Offset)
673 .addFrameIndex(Addr.Base.FI)
674 .addMemOperand(MMO);
675
676 // Base reg with offset in range.
677 } else if (UseOffset) {
678 // VSX only provides an indexed store.
679 if (Is32VSXStore || Is64VSXStore)
680 return false;
681
682 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
683 .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
684
685 // Indexed form.
686 } else {
687 // Get the RR opcode corresponding to the RI one. FIXME: It would be
688 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
689 // is hard to get at.
690 switch (Opc) {
691 default: llvm_unreachable("Unexpected opcode!");
692 case PPC::STB: Opc = PPC::STBX; break;
693 case PPC::STH : Opc = PPC::STHX; break;
694 case PPC::STW : Opc = PPC::STWX; break;
695 case PPC::STB8: Opc = PPC::STBX8; break;
696 case PPC::STH8: Opc = PPC::STHX8; break;
697 case PPC::STW8: Opc = PPC::STWX8; break;
698 case PPC::STD: Opc = PPC::STDX; break;
699 case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
700 case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
701 case PPC::EVSTDD: Opc = PPC::EVSTDDX; break;
702 case PPC::SPESTW: Opc = PPC::SPESTWX; break;
703 }
704
705 auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
706 .addReg(SrcReg);
707
708 // If we have an index register defined we use it in the store inst,
709 // otherwise we use X0 as base as it makes the vector instructions to
710 // use zero in the computation of the effective address regardless the
711 // content of the register.
712 if (IndexReg)
713 MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
714 else
715 MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
716 }
717
718 return true;
719}
720
721// Attempt to fast-select a store instruction.
722bool PPCFastISel::SelectStore(const Instruction *I) {
723 Value *Op0 = I->getOperand(0);
724 Register SrcReg;
725
726 // FIXME: No atomics loads are supported.
727 if (cast<StoreInst>(I)->isAtomic())
728 return false;
729
730 // Verify we have a legal type before going any further.
731 MVT VT;
732 if (!isLoadTypeLegal(Op0->getType(), VT))
733 return false;
734
735 // Get the value to be stored into a register.
736 SrcReg = getRegForValue(Op0);
737 if (!SrcReg)
738 return false;
739
740 // See if we can handle this address.
741 Address Addr;
742 if (!PPCComputeAddress(I->getOperand(1), Addr))
743 return false;
744
745 if (!PPCEmitStore(VT, SrcReg, Addr))
746 return false;
747
748 return true;
749}
750
751// Attempt to fast-select a branch instruction.
752bool PPCFastISel::SelectBranch(const Instruction *I) {
753 const BranchInst *BI = cast<BranchInst>(I);
754 MachineBasicBlock *BrBB = FuncInfo.MBB;
755 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
756 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
757
758 // For now, just try the simplest case where it's fed by a compare.
759 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
760 if (isValueAvailable(CI)) {
761 std::optional<PPC::Predicate> OptPPCPred =
762 getComparePred(CI->getPredicate());
763 if (!OptPPCPred)
764 return false;
765
766 PPC::Predicate PPCPred = *OptPPCPred;
767
768 // Take advantage of fall-through opportunities.
769 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
770 std::swap(TBB, FBB);
771 PPCPred = PPC::InvertPredicate(PPCPred);
772 }
773
774 Register CondReg = createResultReg(&PPC::CRRCRegClass);
775
776 if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
777 CondReg, PPCPred))
778 return false;
779
780 BuildMI(*BrBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::BCC))
781 .addImm(Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
782 .addReg(CondReg)
783 .addMBB(TBB);
784 finishCondBranch(BI->getParent(), TBB, FBB);
785 return true;
786 }
787 } else if (const ConstantInt *CI =
789 uint64_t Imm = CI->getZExtValue();
790 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
791 fastEmitBranch(Target, MIMD.getDL());
792 return true;
793 }
794
795 // FIXME: ARM looks for a case where the block containing the compare
796 // has been split from the block containing the branch. If this happens,
797 // there is a vreg available containing the result of the compare. I'm
798 // not sure we can do much, as we've lost the predicate information with
799 // the compare instruction -- we have a 4-bit CR but don't know which bit
800 // to test here.
801 return false;
802}
803
804// Attempt to emit a compare of the two source values. Signed and unsigned
805// comparisons are supported. Return false if we can't handle it.
806bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
807 bool IsZExt, Register DestReg,
808 const PPC::Predicate Pred) {
809 Type *Ty = SrcValue1->getType();
810 EVT SrcEVT = TLI.getValueType(DL, Ty, true);
811 if (!SrcEVT.isSimple())
812 return false;
813 MVT SrcVT = SrcEVT.getSimpleVT();
814
815 if (SrcVT == MVT::i1 && Subtarget->useCRBits())
816 return false;
817
818 // See if operand 2 is an immediate encodeable in the compare.
819 // FIXME: Operands are not in canonical order at -O0, so an immediate
820 // operand in position 1 is a lost opportunity for now. We are
821 // similar to ARM in this regard.
822 int64_t Imm = 0;
823 bool UseImm = false;
824 const bool HasSPE = Subtarget->hasSPE();
825
826 // Only 16-bit integer constants can be represented in compares for
827 // PowerPC. Others will be materialized into a register.
828 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
829 if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
830 SrcVT == MVT::i8 || SrcVT == MVT::i1) {
831 const APInt &CIVal = ConstInt->getValue();
832 Imm = (IsZExt) ? (int64_t)CIVal.getZExtValue() :
833 (int64_t)CIVal.getSExtValue();
834 if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
835 UseImm = true;
836 }
837 }
838
839 Register SrcReg1 = getRegForValue(SrcValue1);
840 if (!SrcReg1)
841 return false;
842
843 Register SrcReg2;
844 if (!UseImm) {
845 SrcReg2 = getRegForValue(SrcValue2);
846 if (!SrcReg2)
847 return false;
848 }
849
850 unsigned CmpOpc;
851 bool NeedsExt = false;
852
853 auto RC1 = MRI.getRegClass(SrcReg1);
854 auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(SrcReg2) : nullptr;
855
856 switch (SrcVT.SimpleTy) {
857 default: return false;
858 case MVT::f32:
859 if (HasSPE) {
860 switch (Pred) {
861 default: return false;
862 case PPC::PRED_EQ:
863 CmpOpc = PPC::EFSCMPEQ;
864 break;
865 case PPC::PRED_LT:
866 CmpOpc = PPC::EFSCMPLT;
867 break;
868 case PPC::PRED_GT:
869 CmpOpc = PPC::EFSCMPGT;
870 break;
871 }
872 } else {
873 CmpOpc = PPC::FCMPUS;
874 if (isVSSRCRegClass(RC1))
875 SrcReg1 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg1);
876 if (RC2 && isVSSRCRegClass(RC2))
877 SrcReg2 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg2);
878 }
879 break;
880 case MVT::f64:
881 if (HasSPE) {
882 switch (Pred) {
883 default: return false;
884 case PPC::PRED_EQ:
885 CmpOpc = PPC::EFDCMPEQ;
886 break;
887 case PPC::PRED_LT:
888 CmpOpc = PPC::EFDCMPLT;
889 break;
890 case PPC::PRED_GT:
891 CmpOpc = PPC::EFDCMPGT;
892 break;
893 }
894 } else if (isVSFRCRegClass(RC1) || (RC2 && isVSFRCRegClass(RC2))) {
895 CmpOpc = PPC::XSCMPUDP;
896 } else {
897 CmpOpc = PPC::FCMPUD;
898 }
899 break;
900 case MVT::i1:
901 case MVT::i8:
902 case MVT::i16:
903 NeedsExt = true;
904 [[fallthrough]];
905 case MVT::i32:
906 if (!UseImm)
907 CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
908 else
909 CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
910 break;
911 case MVT::i64:
912 if (!UseImm)
913 CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
914 else
915 CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
916 break;
917 }
918
919 if (NeedsExt) {
920 Register ExtReg = createResultReg(&PPC::GPRCRegClass);
921 if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
922 return false;
923 SrcReg1 = ExtReg;
924
925 if (!UseImm) {
926 Register ExtReg = createResultReg(&PPC::GPRCRegClass);
927 if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
928 return false;
929 SrcReg2 = ExtReg;
930 }
931 }
932
933 if (!UseImm)
934 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc), DestReg)
935 .addReg(SrcReg1).addReg(SrcReg2);
936 else
937 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc), DestReg)
938 .addReg(SrcReg1).addImm(Imm);
939
940 return true;
941}
942
943// Attempt to fast-select a floating-point extend instruction.
944bool PPCFastISel::SelectFPExt(const Instruction *I) {
945 Value *Src = I->getOperand(0);
946 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
947 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
948
949 if (SrcVT != MVT::f32 || DestVT != MVT::f64)
950 return false;
951
952 Register SrcReg = getRegForValue(Src);
953 if (!SrcReg)
954 return false;
955
956 // No code is generated for a FP extend.
957 updateValueMap(I, SrcReg);
958 return true;
959}
960
961// Attempt to fast-select a floating-point truncate instruction.
962bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
963 Value *Src = I->getOperand(0);
964 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
965 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
966
967 if (SrcVT != MVT::f64 || DestVT != MVT::f32)
968 return false;
969
970 Register SrcReg = getRegForValue(Src);
971 if (!SrcReg)
972 return false;
973
974 // Round the result to single precision.
975 Register DestReg;
976 auto RC = MRI.getRegClass(SrcReg);
977 if (Subtarget->hasSPE()) {
978 DestReg = createResultReg(&PPC::GPRCRegClass);
979 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::EFSCFD),
980 DestReg)
981 .addReg(SrcReg);
982 } else if (Subtarget->hasP8Vector() && isVSFRCRegClass(RC)) {
983 DestReg = createResultReg(&PPC::VSSRCRegClass);
984 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::XSRSP),
985 DestReg)
986 .addReg(SrcReg);
987 } else {
988 SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
989 DestReg = createResultReg(&PPC::F4RCRegClass);
990 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
991 TII.get(PPC::FRSP), DestReg)
992 .addReg(SrcReg);
993 }
994
995 updateValueMap(I, DestReg);
996 return true;
997}
998
999// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
1000// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1001// those should be used instead of moving via a stack slot when the
1002// subtarget permits.
1003// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
1004// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
1005// case to 8 bytes which produces tighter code but wastes stack space.
1006Register PPCFastISel::PPCMoveToFPReg(MVT SrcVT, Register SrcReg,
1007 bool IsSigned) {
1008
1009 // If necessary, extend 32-bit int to 64-bit.
1010 if (SrcVT == MVT::i32) {
1011 Register TmpReg = createResultReg(&PPC::G8RCRegClass);
1012 if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
1013 return Register();
1014 SrcReg = TmpReg;
1015 }
1016
1017 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1018 Address Addr;
1019 Addr.BaseType = Address::FrameIndexBase;
1020 Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
1021
1022 // Store the value from the GPR.
1023 if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
1024 return Register();
1025
1026 // Load the integer value into an FPR. The kind of load used depends
1027 // on a number of conditions.
1028 unsigned LoadOpc = PPC::LFD;
1029
1030 if (SrcVT == MVT::i32) {
1031 if (!IsSigned) {
1032 LoadOpc = PPC::LFIWZX;
1033 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1034 } else if (Subtarget->hasLFIWAX()) {
1035 LoadOpc = PPC::LFIWAX;
1036 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1037 }
1038 }
1039
1040 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1041 Register ResultReg;
1042 if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
1043 return Register();
1044
1045 return ResultReg;
1046}
1047
1048// Attempt to fast-select an integer-to-floating-point conversion.
1049// FIXME: Once fast-isel has better support for VSX, conversions using
1050// direct moves should be implemented.
1051bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
1052 MVT DstVT;
1053 Type *DstTy = I->getType();
1054 if (!isTypeLegal(DstTy, DstVT))
1055 return false;
1056
1057 if (DstVT != MVT::f32 && DstVT != MVT::f64)
1058 return false;
1059
1060 Value *Src = I->getOperand(0);
1061 EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
1062 if (!SrcEVT.isSimple())
1063 return false;
1064
1065 MVT SrcVT = SrcEVT.getSimpleVT();
1066
1067 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1068 SrcVT != MVT::i32 && SrcVT != MVT::i64)
1069 return false;
1070
1071 Register SrcReg = getRegForValue(Src);
1072 if (!SrcReg)
1073 return false;
1074
1075 // Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
1076 if (Subtarget->hasSPE()) {
1077 unsigned Opc;
1078 if (DstVT == MVT::f32)
1079 Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
1080 else
1081 Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
1082
1083 Register DestReg = createResultReg(&PPC::SPERCRegClass);
1084 // Generate the convert.
1085 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1086 .addReg(SrcReg);
1087 updateValueMap(I, DestReg);
1088 return true;
1089 }
1090
1091 // We can only lower an unsigned convert if we have the newer
1092 // floating-point conversion operations.
1093 if (!IsSigned && !Subtarget->hasFPCVT())
1094 return false;
1095
1096 // FIXME: For now we require the newer floating-point conversion operations
1097 // (which are present only on P7 and A2 server models) when converting
1098 // to single-precision float. Otherwise we have to generate a lot of
1099 // fiddly code to avoid double rounding. If necessary, the fiddly code
1100 // can be found in PPCTargetLowering::LowerINT_TO_FP().
1101 if (DstVT == MVT::f32 && !Subtarget->hasFPCVT())
1102 return false;
1103
1104 // Extend the input if necessary.
1105 if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
1106 Register TmpReg = createResultReg(&PPC::G8RCRegClass);
1107 if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
1108 return false;
1109 SrcVT = MVT::i64;
1110 SrcReg = TmpReg;
1111 }
1112
1113 // Move the integer value to an FPR.
1114 Register FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1115 if (!FPReg)
1116 return false;
1117
1118 // Determine the opcode for the conversion.
1119 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1120 Register DestReg = createResultReg(RC);
1121 unsigned Opc;
1122
1123 if (DstVT == MVT::f32)
1124 Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1125 else
1126 Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1127
1128 // Generate the convert.
1129 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1130 .addReg(FPReg);
1131
1132 updateValueMap(I, DestReg);
1133 return true;
1134}
1135
1136// Move the floating-point value in SrcReg into an integer destination
1137// register, and return the register (or zero if we can't handle it).
1138// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1139// those should be used instead of moving via a stack slot when the
1140// subtarget permits.
1141Register PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1142 Register SrcReg, bool IsSigned) {
1143 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1144 // Note that if have STFIWX available, we could use a 4-byte stack
1145 // slot for i32, but this being fast-isel we'll just go with the
1146 // easiest code gen possible.
1147 Address Addr;
1148 Addr.BaseType = Address::FrameIndexBase;
1149 Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
1150
1151 // Store the value from the FPR.
1152 if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
1153 return Register();
1154
1155 // Reload it into a GPR. If we want an i32 on big endian, modify the
1156 // address to have a 4-byte offset so we load from the right place.
1157 if (VT == MVT::i32)
1158 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1159
1160 // Look at the currently assigned register for this instruction
1161 // to determine the required register class.
1162 Register AssignedReg = FuncInfo.ValueMap[I];
1163 const TargetRegisterClass *RC =
1164 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
1165
1166 Register ResultReg;
1167 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
1168 return Register();
1169
1170 return ResultReg;
1171}
1172
1173// Attempt to fast-select a floating-point-to-integer conversion.
1174// FIXME: Once fast-isel has better support for VSX, conversions using
1175// direct moves should be implemented.
1176bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
1177 MVT DstVT, SrcVT;
1178 Type *DstTy = I->getType();
1179 if (!isTypeLegal(DstTy, DstVT))
1180 return false;
1181
1182 if (DstVT != MVT::i32 && DstVT != MVT::i64)
1183 return false;
1184
1185 // If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
1186 if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() &&
1187 !Subtarget->hasSPE())
1188 return false;
1189
1190 Value *Src = I->getOperand(0);
1191 Type *SrcTy = Src->getType();
1192 if (!isTypeLegal(SrcTy, SrcVT))
1193 return false;
1194
1195 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1196 return false;
1197
1198 Register SrcReg = getRegForValue(Src);
1199 if (!SrcReg)
1200 return false;
1201
1202 // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
1203 // meaningless copy to get the register class right.
1204 const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
1205 if (InRC == &PPC::F4RCRegClass)
1206 SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
1207 else if (InRC == &PPC::VSSRCRegClass)
1208 SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg);
1209
1210 // Determine the opcode for the conversion, which takes place
1211 // entirely within FPRs or VSRs.
1212 Register DestReg;
1213 unsigned Opc;
1214 auto RC = MRI.getRegClass(SrcReg);
1215
1216 if (Subtarget->hasSPE()) {
1217 DestReg = createResultReg(&PPC::GPRCRegClass);
1218 if (IsSigned)
1219 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
1220 else
1221 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
1222 } else if (isVSFRCRegClass(RC)) {
1223 DestReg = createResultReg(&PPC::VSFRCRegClass);
1224 if (DstVT == MVT::i32)
1225 Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
1226 else
1227 Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
1228 } else {
1229 DestReg = createResultReg(&PPC::F8RCRegClass);
1230 if (DstVT == MVT::i32)
1231 if (IsSigned)
1232 Opc = PPC::FCTIWZ;
1233 else
1234 Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1235 else
1236 Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1237 }
1238
1239 // Generate the convert.
1240 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1241 .addReg(SrcReg);
1242
1243 // Now move the integer value from a float register to an integer register.
1244 Register IntReg = Subtarget->hasSPE()
1245 ? DestReg
1246 : PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
1247
1248 if (!IntReg)
1249 return false;
1250
1251 updateValueMap(I, IntReg);
1252 return true;
1253}
1254
1255// Attempt to fast-select a binary integer operation that isn't already
1256// handled automatically.
1257bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1258 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1259
1260 // We can get here in the case when we have a binary operation on a non-legal
1261 // type and the target independent selector doesn't know how to handle it.
1262 if (DestVT != MVT::i16 && DestVT != MVT::i8)
1263 return false;
1264
1265 // Look at the currently assigned register for this instruction
1266 // to determine the required register class. If there is no register,
1267 // make a conservative choice (don't assign R0).
1268 Register AssignedReg = FuncInfo.ValueMap[I];
1269 const TargetRegisterClass *RC =
1270 (AssignedReg ? MRI.getRegClass(AssignedReg) :
1271 &PPC::GPRC_and_GPRC_NOR0RegClass);
1272 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
1273
1274 unsigned Opc;
1275 switch (ISDOpcode) {
1276 default: return false;
1277 case ISD::ADD:
1278 Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1279 break;
1280 case ISD::OR:
1281 Opc = IsGPRC ? PPC::OR : PPC::OR8;
1282 break;
1283 case ISD::SUB:
1284 Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1285 break;
1286 }
1287
1288 Register ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
1289 Register SrcReg1 = getRegForValue(I->getOperand(0));
1290 if (!SrcReg1)
1291 return false;
1292
1293 // Handle case of small immediate operand.
1294 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
1295 const APInt &CIVal = ConstInt->getValue();
1296 int Imm = (int)CIVal.getSExtValue();
1297 bool UseImm = true;
1298 if (isInt<16>(Imm)) {
1299 switch (Opc) {
1300 default:
1301 llvm_unreachable("Missing case!");
1302 case PPC::ADD4:
1303 Opc = PPC::ADDI;
1304 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1305 break;
1306 case PPC::ADD8:
1307 Opc = PPC::ADDI8;
1308 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1309 break;
1310 case PPC::OR:
1311 Opc = PPC::ORI;
1312 break;
1313 case PPC::OR8:
1314 Opc = PPC::ORI8;
1315 break;
1316 case PPC::SUBF:
1317 if (Imm == -32768)
1318 UseImm = false;
1319 else {
1320 Opc = PPC::ADDI;
1321 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1322 Imm = -Imm;
1323 }
1324 break;
1325 case PPC::SUBF8:
1326 if (Imm == -32768)
1327 UseImm = false;
1328 else {
1329 Opc = PPC::ADDI8;
1330 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1331 Imm = -Imm;
1332 }
1333 break;
1334 }
1335
1336 if (UseImm) {
1337 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
1338 ResultReg)
1339 .addReg(SrcReg1)
1340 .addImm(Imm);
1341 updateValueMap(I, ResultReg);
1342 return true;
1343 }
1344 }
1345 }
1346
1347 // Reg-reg case.
1348 Register SrcReg2 = getRegForValue(I->getOperand(1));
1349 if (!SrcReg2)
1350 return false;
1351
1352 // Reverse operands for subtract-from.
1353 if (ISDOpcode == ISD::SUB)
1354 std::swap(SrcReg1, SrcReg2);
1355
1356 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
1357 .addReg(SrcReg1).addReg(SrcReg2);
1358 updateValueMap(I, ResultReg);
1359 return true;
1360}
1361
1362// Handle arguments to a call that we're attempting to fast-select.
1363// Return false if the arguments are too complex for us at the moment.
1364bool PPCFastISel::processCallArgs(SmallVectorImpl<Value *> &Args,
1365 SmallVectorImpl<Register> &ArgRegs,
1366 SmallVectorImpl<MVT> &ArgVTs,
1367 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1368 SmallVectorImpl<unsigned> &RegArgs,
1369 CallingConv::ID CC, unsigned &NumBytes,
1370 bool IsVarArg) {
1372 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
1373
1374 // Reserve space for the linkage area on the stack.
1375 unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize();
1376 CCInfo.AllocateStack(LinkageSize, Align(8));
1377
1379 for (Value *Arg : Args)
1380 ArgTys.push_back(Arg->getType());
1381 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, ArgTys, CC_PPC64_ELF_FIS);
1382
1383 // Bail out if we can't handle any of the arguments.
1384 for (const CCValAssign &VA : ArgLocs) {
1385 MVT ArgVT = ArgVTs[VA.getValNo()];
1386
1387 // Skip vector arguments for now, as well as long double and
1388 // uint128_t, and anything that isn't passed in a register.
1389 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
1390 !VA.isRegLoc() || VA.needsCustom())
1391 return false;
1392
1393 // Skip bit-converted arguments for now.
1394 if (VA.getLocInfo() == CCValAssign::BCvt)
1395 return false;
1396 }
1397
1398 // Get a count of how many bytes are to be pushed onto the stack.
1399 NumBytes = CCInfo.getStackSize();
1400
1401 // The prolog code of the callee may store up to 8 GPR argument registers to
1402 // the stack, allowing va_start to index over them in memory if its varargs.
1403 // Because we cannot tell if this is needed on the caller side, we have to
1404 // conservatively assume that it is needed. As such, make sure we have at
1405 // least enough stack space for the caller to store the 8 GPRs.
1406 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1407 NumBytes = std::max(NumBytes, LinkageSize + 64);
1408
1409 // Issue CALLSEQ_START.
1410 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1411 TII.get(TII.getCallFrameSetupOpcode()))
1412 .addImm(NumBytes).addImm(0);
1413
1414 // Prepare to assign register arguments. Every argument uses up a
1415 // GPR protocol register even if it's passed in a floating-point
1416 // register (unless we're using the fast calling convention).
1417 unsigned NextGPR = PPC::X3;
1418 unsigned NextFPR = PPC::F1;
1419
1420 // Process arguments.
1421 for (const CCValAssign &VA : ArgLocs) {
1422 Register Arg = ArgRegs[VA.getValNo()];
1423 MVT ArgVT = ArgVTs[VA.getValNo()];
1424
1425 // Handle argument promotion and bitcasts.
1426 switch (VA.getLocInfo()) {
1427 default:
1428 llvm_unreachable("Unknown loc info!");
1429 case CCValAssign::Full:
1430 break;
1431 case CCValAssign::SExt: {
1432 MVT DestVT = VA.getLocVT();
1433 const TargetRegisterClass *RC =
1434 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1435 Register TmpReg = createResultReg(RC);
1436 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
1437 llvm_unreachable("Failed to emit a sext!");
1438 ArgVT = DestVT;
1439 Arg = TmpReg;
1440 break;
1441 }
1442 case CCValAssign::AExt:
1443 case CCValAssign::ZExt: {
1444 MVT DestVT = VA.getLocVT();
1445 const TargetRegisterClass *RC =
1446 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1447 Register TmpReg = createResultReg(RC);
1448 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
1449 llvm_unreachable("Failed to emit a zext!");
1450 ArgVT = DestVT;
1451 Arg = TmpReg;
1452 break;
1453 }
1454 case CCValAssign::BCvt: {
1455 // FIXME: Not yet handled.
1456 llvm_unreachable("Should have bailed before getting here!");
1457 break;
1458 }
1459 }
1460
1461 // Copy this argument to the appropriate register.
1462 unsigned ArgReg;
1463 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
1464 ArgReg = NextFPR++;
1465 if (CC != CallingConv::Fast)
1466 ++NextGPR;
1467 } else
1468 ArgReg = NextGPR++;
1469
1470 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1471 TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
1472 RegArgs.push_back(ArgReg);
1473 }
1474
1475 return true;
1476}
1477
1478// For a call that we've determined we can fast-select, finish the
1479// call sequence and generate a copy to obtain the return value (if any).
1480bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1481 CallingConv::ID CC = CLI.CallConv;
1482
1483 // Issue CallSEQ_END.
1484 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1485 TII.get(TII.getCallFrameDestroyOpcode()))
1486 .addImm(NumBytes).addImm(0);
1487
1488 // Next, generate a copy to obtain the return value.
1489 // FIXME: No multi-register return values yet, though I don't foresee
1490 // any real difficulties there.
1491 if (RetVT != MVT::isVoid) {
1493 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1494 CCInfo.AnalyzeCallResult(RetVT, CLI.RetTy, RetCC_PPC64_ELF_FIS);
1495 CCValAssign &VA = RVLocs[0];
1496 assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
1497 assert(VA.isRegLoc() && "Can only return in registers!");
1498
1499 MVT DestVT = VA.getValVT();
1500 MVT CopyVT = DestVT;
1501
1502 // Ints smaller than a register still arrive in a full 64-bit
1503 // register, so make sure we recognize this.
1504 if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
1505 CopyVT = MVT::i64;
1506
1507 Register SourcePhysReg = VA.getLocReg();
1508 Register ResultReg;
1509
1510 if (RetVT == CopyVT) {
1511 const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
1512 ResultReg = copyRegToRegClass(CpyRC, SourcePhysReg);
1513
1514 // If necessary, round the floating result to single precision.
1515 } else if (CopyVT == MVT::f64) {
1516 ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
1517 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::FRSP),
1518 ResultReg).addReg(SourcePhysReg);
1519
1520 // If only the low half of a general register is needed, generate
1521 // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1522 // used along the fast-isel path (not lowered), and downstream logic
1523 // also doesn't like a direct subreg copy on a physical reg.)
1524 } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
1525 // Convert physical register from G8RC to GPRC.
1526 SourcePhysReg = (SourcePhysReg - PPC::X0) + PPC::R0;
1527 ResultReg = copyRegToRegClass(&PPC::GPRCRegClass, SourcePhysReg);
1528 }
1529
1530 assert(ResultReg && "ResultReg unset!");
1531 CLI.InRegs.push_back(SourcePhysReg);
1532 CLI.ResultReg = ResultReg;
1533 CLI.NumResultRegs = 1;
1534 }
1535
1536 return true;
1537}
1538
1539bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1540 CallingConv::ID CC = CLI.CallConv;
1541 bool IsTailCall = CLI.IsTailCall;
1542 bool IsVarArg = CLI.IsVarArg;
1543 const Value *Callee = CLI.Callee;
1544 const MCSymbol *Symbol = CLI.Symbol;
1545
1546 if (!Callee && !Symbol)
1547 return false;
1548
1549 // Allow SelectionDAG isel to handle tail calls and long calls.
1550 if (IsTailCall || Subtarget->useLongCalls())
1551 return false;
1552
1553 // Let SDISel handle vararg functions.
1554 if (IsVarArg)
1555 return false;
1556
1557 // If this is a PC-Rel function, let SDISel handle the call.
1558 if (Subtarget->isUsingPCRelativeCalls())
1559 return false;
1560
1561 // Handle simple calls for now, with legal return types and
1562 // those that can be extended.
1563 Type *RetTy = CLI.RetTy;
1564 MVT RetVT;
1565 if (RetTy->isVoidTy())
1566 RetVT = MVT::isVoid;
1567 else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
1568 RetVT != MVT::i8)
1569 return false;
1570 else if (RetVT == MVT::i1 && Subtarget->useCRBits())
1571 // We can't handle boolean returns when CR bits are in use.
1572 return false;
1573
1574 // FIXME: No multi-register return values yet.
1575 if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1576 RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1577 RetVT != MVT::f64) {
1579 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
1580 CCInfo.AnalyzeCallResult(RetVT, RetTy, RetCC_PPC64_ELF_FIS);
1581 if (RVLocs.size() > 1)
1582 return false;
1583 }
1584
1585 // Bail early if more than 8 arguments, as we only currently
1586 // handle arguments passed in registers.
1587 unsigned NumArgs = CLI.OutVals.size();
1588 if (NumArgs > 8)
1589 return false;
1590
1591 // Set up the argument vectors.
1592 SmallVector<Value*, 8> Args;
1594 SmallVector<MVT, 8> ArgVTs;
1596
1597 Args.reserve(NumArgs);
1598 ArgRegs.reserve(NumArgs);
1599 ArgVTs.reserve(NumArgs);
1600 ArgFlags.reserve(NumArgs);
1601
1602 for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
1603 // Only handle easy calls for now. It would be reasonably easy
1604 // to handle <= 8-byte structures passed ByVal in registers, but we
1605 // have to ensure they are right-justified in the register.
1606 ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
1607 if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
1608 return false;
1609
1610 Value *ArgValue = CLI.OutVals[i];
1611 Type *ArgTy = ArgValue->getType();
1612 MVT ArgVT;
1613 if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1614 return false;
1615
1616 // FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP
1617 // types, which is passed through vector register. Skip these types and
1618 // fallback to default SelectionDAG based selection.
1619 if (ArgVT.isVector() || ArgVT == MVT::f128)
1620 return false;
1621
1622 Register Arg = getRegForValue(ArgValue);
1623 if (!Arg)
1624 return false;
1625
1626 Args.push_back(ArgValue);
1627 ArgRegs.push_back(Arg);
1628 ArgVTs.push_back(ArgVT);
1629 ArgFlags.push_back(Flags);
1630 }
1631
1632 // Process the arguments.
1633 SmallVector<unsigned, 8> RegArgs;
1634 unsigned NumBytes;
1635
1636 if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1637 RegArgs, CC, NumBytes, IsVarArg))
1638 return false;
1639
1640 MachineInstrBuilder MIB;
1641 // FIXME: No handling for function pointers yet. This requires
1642 // implementing the function descriptor (OPD) setup.
1643 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
1644 if (!GV) {
1645 // patchpoints are a special case; they always dispatch to a pointer value.
1646 // However, we don't actually want to generate the indirect call sequence
1647 // here (that will be generated, as necessary, during asm printing), and
1648 // the call we generate here will be erased by FastISel::selectPatchpoint,
1649 // so don't try very hard...
1650 if (CLI.IsPatchPoint)
1651 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::NOP));
1652 else
1653 return false;
1654 } else {
1655 // Build direct call with NOP for TOC restore.
1656 // FIXME: We can and should optimize away the NOP for local calls.
1657 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1658 TII.get(PPC::BL8_NOP));
1659 // Add callee.
1660 MIB.addGlobalAddress(GV);
1661 }
1662
1663 // Add implicit physical register uses to the call.
1664 for (unsigned Reg : RegArgs)
1666
1667 // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1668 // into the call.
1669 PPCFuncInfo->setUsesTOCBasePtr();
1670 MIB.addReg(PPC::X2, RegState::Implicit);
1671
1672 // Add a register mask with the call-preserved registers. Proper
1673 // defs for return values will be added by setPhysRegsDeadExcept().
1674 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
1675
1676 CLI.Call = MIB;
1677
1678 // Finish off the call including any return values.
1679 return finishCall(RetVT, CLI, NumBytes);
1680}
1681
1682// Attempt to fast-select a return instruction.
1683bool PPCFastISel::SelectRet(const Instruction *I) {
1684
1685 if (!FuncInfo.CanLowerReturn)
1686 return false;
1687
1688 const ReturnInst *Ret = cast<ReturnInst>(I);
1689 const Function &F = *I->getParent()->getParent();
1690
1691 // Build a list of return value registers.
1693 CallingConv::ID CC = F.getCallingConv();
1694
1695 if (Ret->getNumOperands() > 0) {
1697 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1698
1699 // Analyze operands of the call, assigning locations to each operand.
1701 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
1702 CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
1703 const Value *RV = Ret->getOperand(0);
1704
1705 // FIXME: Only one output register for now.
1706 if (ValLocs.size() > 1)
1707 return false;
1708
1709 // Special case for returning a constant integer of any size - materialize
1710 // the constant as an i64 and copy it to the return register.
1711 if (isa<ConstantInt>(RV) && RV->getType()->isIntegerTy()) {
1712 const ConstantInt *CI = cast<ConstantInt>(RV);
1713 CCValAssign &VA = ValLocs[0];
1714
1715 Register RetReg = VA.getLocReg();
1716 // We still need to worry about properly extending the sign. For example,
1717 // we could have only a single bit or a constant that needs zero
1718 // extension rather than sign extension. Make sure we pass the return
1719 // value extension property to integer materialization.
1720 Register SrcReg =
1721 PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt);
1722
1723 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1724 TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
1725
1726 RetRegs.push_back(RetReg);
1727
1728 } else {
1729 Register Reg = getRegForValue(RV);
1730
1731 if (!Reg)
1732 return false;
1733
1734 // Copy the result values into the output registers.
1735 for (unsigned i = 0; i < ValLocs.size(); ++i) {
1736
1737 CCValAssign &VA = ValLocs[i];
1738 assert(VA.isRegLoc() && "Can only return in registers!");
1739 RetRegs.push_back(VA.getLocReg());
1740 Register SrcReg = Reg + VA.getValNo();
1741
1742 EVT RVEVT = TLI.getValueType(DL, RV->getType());
1743 if (!RVEVT.isSimple())
1744 return false;
1745 MVT RVVT = RVEVT.getSimpleVT();
1746 MVT DestVT = VA.getLocVT();
1747
1748 if (RVVT != DestVT && RVVT != MVT::i8 &&
1749 RVVT != MVT::i16 && RVVT != MVT::i32)
1750 return false;
1751
1752 if (RVVT != DestVT) {
1753 switch (VA.getLocInfo()) {
1754 default:
1755 llvm_unreachable("Unknown loc info!");
1756 case CCValAssign::Full:
1757 llvm_unreachable("Full value assign but types don't match?");
1758 case CCValAssign::AExt:
1759 case CCValAssign::ZExt: {
1760 const TargetRegisterClass *RC =
1761 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1762 Register TmpReg = createResultReg(RC);
1763 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
1764 return false;
1765 SrcReg = TmpReg;
1766 break;
1767 }
1768 case CCValAssign::SExt: {
1769 const TargetRegisterClass *RC =
1770 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1771 Register TmpReg = createResultReg(RC);
1772 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
1773 return false;
1774 SrcReg = TmpReg;
1775 break;
1776 }
1777 }
1778 }
1779
1780 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1781 TII.get(TargetOpcode::COPY), RetRegs[i])
1782 .addReg(SrcReg);
1783 }
1784 }
1785 }
1786
1787 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1788 TII.get(PPC::BLR8));
1789
1790 for (Register Reg : RetRegs)
1792
1793 return true;
1794}
1795
1796// Attempt to emit an integer extend of SrcReg into DestReg. Both
1797// signed and zero extensions are supported. Return false if we
1798// can't handle it.
1799bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
1800 Register DestReg, bool IsZExt) {
1801 if (DestVT != MVT::i32 && DestVT != MVT::i64)
1802 return false;
1803 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1804 return false;
1805
1806 // Signed extensions use EXTSB, EXTSH, EXTSW.
1807 if (!IsZExt) {
1808 unsigned Opc;
1809 if (SrcVT == MVT::i8)
1810 Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1811 else if (SrcVT == MVT::i16)
1812 Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1813 else {
1814 assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1815 Opc = PPC::EXTSW_32_64;
1816 }
1817 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1818 .addReg(SrcReg);
1819
1820 // Unsigned 32-bit extensions use RLWINM.
1821 } else if (DestVT == MVT::i32) {
1822 unsigned MB;
1823 if (SrcVT == MVT::i8)
1824 MB = 24;
1825 else {
1826 assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1827 MB = 16;
1828 }
1829 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::RLWINM),
1830 DestReg)
1831 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
1832
1833 // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1834 } else {
1835 unsigned MB;
1836 if (SrcVT == MVT::i8)
1837 MB = 56;
1838 else if (SrcVT == MVT::i16)
1839 MB = 48;
1840 else
1841 MB = 32;
1842 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1843 TII.get(PPC::RLDICL_32_64), DestReg)
1844 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
1845 }
1846
1847 return true;
1848}
1849
1850// Attempt to fast-select an indirect branch instruction.
1851bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1852 Register AddrReg = getRegForValue(I->getOperand(0));
1853 if (!AddrReg)
1854 return false;
1855
1856 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::MTCTR8))
1857 .addReg(AddrReg);
1858 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::BCTR8));
1859
1860 const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1861 for (const BasicBlock *SuccBB : IB->successors())
1862 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(SuccBB));
1863
1864 return true;
1865}
1866
1867// Attempt to fast-select an integer truncate instruction.
1868bool PPCFastISel::SelectTrunc(const Instruction *I) {
1869 Value *Src = I->getOperand(0);
1870 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
1871 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1872
1873 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1874 return false;
1875
1876 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1877 return false;
1878
1879 Register SrcReg = getRegForValue(Src);
1880 if (!SrcReg)
1881 return false;
1882
1883 // The only interesting case is when we need to switch register classes.
1884 if (SrcVT == MVT::i64)
1885 SrcReg = copyRegToRegClass(&PPC::GPRCRegClass, SrcReg, 0, PPC::sub_32);
1886
1887 updateValueMap(I, SrcReg);
1888 return true;
1889}
1890
1891// Attempt to fast-select an integer extend instruction.
1892bool PPCFastISel::SelectIntExt(const Instruction *I) {
1893 Type *DestTy = I->getType();
1894 Value *Src = I->getOperand(0);
1895 Type *SrcTy = Src->getType();
1896
1897 bool IsZExt = isa<ZExtInst>(I);
1898 Register SrcReg = getRegForValue(Src);
1899 if (!SrcReg) return false;
1900
1901 EVT SrcEVT, DestEVT;
1902 SrcEVT = TLI.getValueType(DL, SrcTy, true);
1903 DestEVT = TLI.getValueType(DL, DestTy, true);
1904 if (!SrcEVT.isSimple())
1905 return false;
1906 if (!DestEVT.isSimple())
1907 return false;
1908
1909 MVT SrcVT = SrcEVT.getSimpleVT();
1910 MVT DestVT = DestEVT.getSimpleVT();
1911
1912 // If we know the register class needed for the result of this
1913 // instruction, use it. Otherwise pick the register class of the
1914 // correct size that does not contain X0/R0, since we don't know
1915 // whether downstream uses permit that assignment.
1916 Register AssignedReg = FuncInfo.ValueMap[I];
1917 const TargetRegisterClass *RC =
1918 (AssignedReg ? MRI.getRegClass(AssignedReg) :
1919 (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1920 &PPC::GPRC_and_GPRC_NOR0RegClass));
1921 Register ResultReg = createResultReg(RC);
1922
1923 if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
1924 return false;
1925
1926 updateValueMap(I, ResultReg);
1927 return true;
1928}
1929
1930// Attempt to fast-select an instruction that wasn't handled by
1931// the table-generated machinery.
1932bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1933
1934 switch (I->getOpcode()) {
1935 case Instruction::Load:
1936 return SelectLoad(I);
1937 case Instruction::Store:
1938 return SelectStore(I);
1939 case Instruction::Br:
1940 return SelectBranch(I);
1941 case Instruction::IndirectBr:
1942 return SelectIndirectBr(I);
1943 case Instruction::FPExt:
1944 return SelectFPExt(I);
1945 case Instruction::FPTrunc:
1946 return SelectFPTrunc(I);
1947 case Instruction::SIToFP:
1948 return SelectIToFP(I, /*IsSigned*/ true);
1949 case Instruction::UIToFP:
1950 return SelectIToFP(I, /*IsSigned*/ false);
1951 case Instruction::FPToSI:
1952 return SelectFPToI(I, /*IsSigned*/ true);
1953 case Instruction::FPToUI:
1954 return SelectFPToI(I, /*IsSigned*/ false);
1955 case Instruction::Add:
1956 return SelectBinaryIntOp(I, ISD::ADD);
1957 case Instruction::Or:
1958 return SelectBinaryIntOp(I, ISD::OR);
1959 case Instruction::Sub:
1960 return SelectBinaryIntOp(I, ISD::SUB);
1961 case Instruction::Ret:
1962 return SelectRet(I);
1963 case Instruction::Trunc:
1964 return SelectTrunc(I);
1965 case Instruction::ZExt:
1966 case Instruction::SExt:
1967 return SelectIntExt(I);
1968 // Here add other flavors of Instruction::XXX that automated
1969 // cases don't catch. For example, switches are terminators
1970 // that aren't yet handled.
1971 default:
1972 break;
1973 }
1974 return false;
1975}
1976
1977// Materialize a floating-point constant into a register, and return
1978// the register number (or zero if we failed to handle it).
1979Register PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1980 // If this is a PC-Rel function, let SDISel handle constant pool.
1981 if (Subtarget->isUsingPCRelativeCalls())
1982 return Register();
1983
1984 // No plans to handle long double here.
1985 if (VT != MVT::f32 && VT != MVT::f64)
1986 return Register();
1987
1988 // All FP constants are loaded from the constant pool.
1989 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
1990 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
1991 const bool HasSPE = Subtarget->hasSPE();
1992 const TargetRegisterClass *RC;
1993 if (HasSPE)
1994 RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
1995 else
1996 RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
1997
1998 Register DestReg = createResultReg(RC);
1999 CodeModel::Model CModel = TM.getCodeModel();
2000
2001 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2003 MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Alignment);
2004
2005 unsigned Opc;
2006
2007 if (HasSPE)
2008 Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD);
2009 else
2010 Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
2011
2012 Register TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2013
2014 PPCFuncInfo->setUsesTOCBasePtr();
2015 // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
2016 if (CModel == CodeModel::Small) {
2017 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocCPT),
2018 TmpReg)
2019 .addConstantPoolIndex(Idx).addReg(PPC::X2);
2020 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2021 .addImm(0).addReg(TmpReg).addMemOperand(MMO);
2022 } else {
2023 // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)).
2024 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8),
2025 TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
2026 // But for large code model, we must generate a LDtocL followed
2027 // by the LF[SD].
2028 if (CModel == CodeModel::Large) {
2029 Register TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2030 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL),
2031 TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
2032 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2033 .addImm(0)
2034 .addReg(TmpReg2);
2035 } else
2036 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2038 .addReg(TmpReg)
2039 .addMemOperand(MMO);
2040 }
2041
2042 return DestReg;
2043}
2044
2045// Materialize the address of a global value into a register, and return
2046// the register number (or zero if we failed to handle it).
2047Register PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
2048 // If this is a PC-Rel function, let SDISel handle GV materialization.
2049 if (Subtarget->isUsingPCRelativeCalls())
2050 return Register();
2051
2052 assert(VT == MVT::i64 && "Non-address!");
2053 const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
2054 Register DestReg = createResultReg(RC);
2055
2056 // Global values may be plain old object addresses, TLS object
2057 // addresses, constant pool entries, or jump tables. How we generate
2058 // code for these may depend on small, medium, or large code model.
2059 CodeModel::Model CModel = TM.getCodeModel();
2060
2061 // FIXME: Jump tables are not yet required because fast-isel doesn't
2062 // handle switches; if that changes, we need them as well. For now,
2063 // what follows assumes everything's a generic (or TLS) global address.
2064
2065 // FIXME: We don't yet handle the complexity of TLS.
2066 if (GV->isThreadLocal())
2067 return Register();
2068
2069 PPCFuncInfo->setUsesTOCBasePtr();
2070 bool IsAIXTocData = TM.getTargetTriple().isOSAIX() &&
2071 isa<GlobalVariable>(GV) &&
2072 cast<GlobalVariable>(GV)->hasAttribute("toc-data");
2073
2074 // For small code model, generate a simple TOC load.
2075 if (CModel == CodeModel::Small) {
2076 auto MIB = BuildMI(
2077 *FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2078 IsAIXTocData ? TII.get(PPC::ADDItoc8) : TII.get(PPC::LDtoc), DestReg);
2079 if (IsAIXTocData)
2080 MIB.addReg(PPC::X2).addGlobalAddress(GV);
2081 else
2082 MIB.addGlobalAddress(GV).addReg(PPC::X2);
2083 } else {
2084 // If the address is an externally defined symbol, a symbol with common
2085 // or externally available linkage, a non-local function address, or a
2086 // jump table address (not yet needed), or if we are generating code
2087 // for large code model, we generate:
2088 // LDtocL(GV, ADDIStocHA8(%x2, GV))
2089 // Otherwise we generate:
2090 // ADDItocL8(ADDIStocHA8(%x2, GV), GV)
2091 // Either way, start with the ADDIStocHA8:
2092 Register HighPartReg = createResultReg(RC);
2093 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8),
2094 HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
2095
2096 if (Subtarget->isGVIndirectSymbol(GV)) {
2097 assert(!IsAIXTocData && "TOC data should always be direct.");
2098 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL),
2099 DestReg).addGlobalAddress(GV).addReg(HighPartReg);
2100 } else {
2101 // Otherwise generate the ADDItocL8.
2102 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDItocL8),
2103 DestReg)
2104 .addReg(HighPartReg)
2105 .addGlobalAddress(GV);
2106 }
2107 }
2108
2109 return DestReg;
2110}
2111
2112// Materialize a 32-bit integer constant into a register, and return
2113// the register number (or zero if we failed to handle it).
2114Register PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2115 const TargetRegisterClass *RC) {
2116 unsigned Lo = Imm & 0xFFFF;
2117 unsigned Hi = (Imm >> 16) & 0xFFFF;
2118
2119 Register ResultReg = createResultReg(RC);
2120 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
2121
2122 if (isInt<16>(Imm))
2123 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2124 TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
2125 .addImm(Imm);
2126 else if (Lo) {
2127 // Both Lo and Hi have nonzero bits.
2128 Register TmpReg = createResultReg(RC);
2129 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2130 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
2131 .addImm(Hi);
2132 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2133 TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
2134 .addReg(TmpReg).addImm(Lo);
2135 } else
2136 // Just Hi bits.
2137 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2138 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
2139 .addImm(Hi);
2140
2141 return ResultReg;
2142}
2143
2144// Materialize a 64-bit integer constant into a register, and return
2145// the register number (or zero if we failed to handle it).
2146Register PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2147 const TargetRegisterClass *RC) {
2148 unsigned Remainder = 0;
2149 unsigned Shift = 0;
2150
2151 // If the value doesn't fit in 32 bits, see if we can shift it
2152 // so that it fits in 32 bits.
2153 if (!isInt<32>(Imm)) {
2154 Shift = llvm::countr_zero<uint64_t>(Imm);
2155 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2156
2157 if (isInt<32>(ImmSh))
2158 Imm = ImmSh;
2159 else {
2160 Remainder = Imm;
2161 Shift = 32;
2162 Imm >>= 32;
2163 }
2164 }
2165
2166 // Handle the high-order 32 bits (if shifted) or the whole 32 bits
2167 // (if not shifted).
2168 Register TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2169 if (!Shift)
2170 return TmpReg1;
2171
2172 // If upper 32 bits were not zero, we've built them and need to shift
2173 // them into place.
2174 Register TmpReg2;
2175 if (Imm) {
2176 TmpReg2 = createResultReg(RC);
2177 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::RLDICR),
2178 TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
2179 } else
2180 TmpReg2 = TmpReg1;
2181
2182 Register TmpReg3;
2183 unsigned Hi, Lo;
2184 if ((Hi = (Remainder >> 16) & 0xFFFF)) {
2185 TmpReg3 = createResultReg(RC);
2186 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ORIS8),
2187 TmpReg3).addReg(TmpReg2).addImm(Hi);
2188 } else
2189 TmpReg3 = TmpReg2;
2190
2191 if ((Lo = Remainder & 0xFFFF)) {
2192 Register ResultReg = createResultReg(RC);
2193 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ORI8),
2194 ResultReg).addReg(TmpReg3).addImm(Lo);
2195 return ResultReg;
2196 }
2197
2198 return TmpReg3;
2199}
2200
2201// Materialize an integer constant into a register, and return
2202// the register number (or zero if we failed to handle it).
2203Register PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2204 bool UseSExt) {
2205 // If we're using CR bit registers for i1 values, handle that as a special
2206 // case first.
2207 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2208 Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2209 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2210 TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2211 return ImmReg;
2212 }
2213
2214 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2215 VT != MVT::i1)
2216 return Register();
2217
2218 const TargetRegisterClass *RC =
2219 ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2220 int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2221
2222 // If the constant is in range, use a load-immediate.
2223 // Since LI will sign extend the constant we need to make sure that for
2224 // our zeroext constants that the sign extended constant fits into 16-bits -
2225 // a range of 0..0x7fff.
2226 if (isInt<16>(Imm)) {
2227 unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2228 Register ImmReg = createResultReg(RC);
2229 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ImmReg)
2230 .addImm(Imm);
2231 return ImmReg;
2232 }
2233
2234 // Construct the constant piecewise.
2235 if (VT == MVT::i64)
2236 return PPCMaterialize64BitInt(Imm, RC);
2237 else if (VT == MVT::i32)
2238 return PPCMaterialize32BitInt(Imm, RC);
2239
2240 return Register();
2241}
2242
2243// Materialize a constant into a register, and return the register
2244// number (or zero if we failed to handle it).
2245Register PPCFastISel::fastMaterializeConstant(const Constant *C) {
2246 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
2247
2248 // Only handle simple types.
2249 if (!CEVT.isSimple())
2250 return Register();
2251 MVT VT = CEVT.getSimpleVT();
2252
2253 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
2254 return PPCMaterializeFP(CFP, VT);
2255 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
2256 return PPCMaterializeGV(GV, VT);
2257 else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
2258 // Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
2259 // assumes that constant PHI operands will be zero extended, and failure to
2260 // match that assumption will cause problems if we sign extend here but
2261 // some user of a PHI is in a block for which we fall back to full SDAG
2262 // instruction selection.
2263 return PPCMaterializeInt(CI, VT, false);
2264
2265 return Register();
2266}
2267
2268// Materialize the address created by an alloca into a register, and
2269// return the register number (or zero if we failed to handle it).
2270Register PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2271 DenseMap<const AllocaInst *, int>::iterator SI =
2272 FuncInfo.StaticAllocaMap.find(AI);
2273
2274 // Don't handle dynamic allocas.
2275 if (SI == FuncInfo.StaticAllocaMap.end())
2276 return Register();
2277
2278 MVT VT;
2279 if (!isLoadTypeLegal(AI->getType(), VT))
2280 return Register();
2281
2282 if (SI != FuncInfo.StaticAllocaMap.end()) {
2283 Register ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2284 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDI8),
2285 ResultReg).addFrameIndex(SI->second).addImm(0);
2286 return ResultReg;
2287 }
2288
2289 return Register();
2290}
2291
2292// Fold loads into extends when possible.
2293// FIXME: We can have multiple redundant extend/trunc instructions
2294// following a load. The folding only picks up one. Extend this
2295// to check subsequent instructions for the same pattern and remove
2296// them. Thus ResultReg should be the def reg for the last redundant
2297// instruction in a chain, and all intervening instructions can be
2298// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2299// to add ELF64-NOT: rldicl to the appropriate tests when this works.
2300bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2301 const LoadInst *LI) {
2302 // Verify we have a legal type before going any further.
2303 MVT VT;
2304 if (!isLoadTypeLegal(LI->getType(), VT))
2305 return false;
2306
2307 // Combine load followed by zero- or sign-extend.
2308 bool IsZExt = false;
2309 switch(MI->getOpcode()) {
2310 default:
2311 return false;
2312
2313 case PPC::RLDICL:
2314 case PPC::RLDICL_32_64: {
2315 IsZExt = true;
2316 unsigned MB = MI->getOperand(3).getImm();
2317 if ((VT == MVT::i8 && MB <= 56) ||
2318 (VT == MVT::i16 && MB <= 48) ||
2319 (VT == MVT::i32 && MB <= 32))
2320 break;
2321 return false;
2322 }
2323
2324 case PPC::RLWINM:
2325 case PPC::RLWINM8: {
2326 IsZExt = true;
2327 unsigned MB = MI->getOperand(3).getImm();
2328 if ((VT == MVT::i8 && MB <= 24) ||
2329 (VT == MVT::i16 && MB <= 16))
2330 break;
2331 return false;
2332 }
2333
2334 case PPC::EXTSB:
2335 case PPC::EXTSB8:
2336 case PPC::EXTSB8_32_64:
2337 /* There is no sign-extending load-byte instruction. */
2338 return false;
2339
2340 case PPC::EXTSH:
2341 case PPC::EXTSH8:
2342 case PPC::EXTSH8_32_64: {
2343 if (VT != MVT::i16 && VT != MVT::i8)
2344 return false;
2345 break;
2346 }
2347
2348 case PPC::EXTSW:
2349 case PPC::EXTSW_32:
2350 case PPC::EXTSW_32_64: {
2351 if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2352 return false;
2353 break;
2354 }
2355 }
2356
2357 // See if we can handle this address.
2358 Address Addr;
2359 if (!PPCComputeAddress(LI->getOperand(0), Addr))
2360 return false;
2361
2362 Register ResultReg = MI->getOperand(0).getReg();
2363
2364 if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt,
2365 Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
2366 return false;
2367
2369 removeDeadCode(I, std::next(I));
2370 return true;
2371}
2372
2373// Attempt to lower call arguments in a faster way than done by
2374// the selection DAG code.
2375bool PPCFastISel::fastLowerArguments() {
2376 // Defer to normal argument lowering for now. It's reasonably
2377 // efficient. Consider doing something like ARM to handle the
2378 // case where all args fit in registers, no varargs, no float
2379 // or vector args.
2380 return false;
2381}
2382
2383// Handle materializing integer constants into a register. This is not
2384// automatically generated for PowerPC, so must be explicitly created here.
2385Register PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2386
2387 if (Opc != ISD::Constant)
2388 return Register();
2389
2390 // If we're using CR bit registers for i1 values, handle that as a special
2391 // case first.
2392 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2393 Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2394 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2395 TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2396 return ImmReg;
2397 }
2398
2399 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2400 VT != MVT::i1)
2401 return Register();
2402
2403 const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2404 &PPC::GPRCRegClass);
2405 if (VT == MVT::i64)
2406 return PPCMaterialize64BitInt(Imm, RC);
2407 else
2408 return PPCMaterialize32BitInt(Imm, RC);
2409}
2410
2411// Override for ADDI and ADDI8 to set the correct register class
2412// on RHS operand 0. The automatic infrastructure naively assumes
2413// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2414// for these cases. At the moment, none of the other automatically
2415// generated RI instructions require special treatment. However, once
2416// SelectSelect is implemented, "isel" requires similar handling.
2417//
2418// Also be conservative about the output register class. Avoid
2419// assigning R0 or X0 to the output register for GPRC and G8RC
2420// register classes, as any such result could be used in ADDI, etc.,
2421// where those regs have another meaning.
2422Register PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2423 const TargetRegisterClass *RC,
2424 Register Op0, uint64_t Imm) {
2425 if (MachineInstOpcode == PPC::ADDI)
2426 MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
2427 else if (MachineInstOpcode == PPC::ADDI8)
2428 MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
2429
2430 const TargetRegisterClass *UseRC =
2431 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2432 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2433
2434 return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC, Op0, Imm);
2435}
2436
2437// Override for instructions with one register operand to avoid use of
2438// R0/X0. The automatic infrastructure isn't aware of the context so
2439// we must be conservative.
2440Register PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2441 const TargetRegisterClass *RC,
2442 Register Op0) {
2443 const TargetRegisterClass *UseRC =
2444 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2445 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2446
2447 return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0);
2448}
2449
2450// Override for instructions with two register operands to avoid use
2451// of R0/X0. The automatic infrastructure isn't aware of the context
2452// so we must be conservative.
2453Register PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2454 const TargetRegisterClass *RC,
2455 Register Op0, Register Op1) {
2456 const TargetRegisterClass *UseRC =
2457 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2458 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2459
2460 return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op1);
2461}
2462
2463namespace llvm {
2464 // Create the fast instruction selector for PowerPC64 ELF.
2466 const TargetLibraryInfo *LibInfo) {
2467 // Only available on 64-bit for now.
2468 const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2469 if (Subtarget.isPPC64())
2470 return new PPCFastISel(FuncInfo, LibInfo);
2471 return nullptr;
2472 }
2473}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file defines the FastISel class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t IntrinsicInst * II
static std::optional< PPC::Predicate > getComparePred(CmpInst::Predicate Pred)
static constexpr MCPhysReg FPReg
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
This file describes how to lower LLVM code to machine code.
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
an instruction to allocate memory on the stack
PointerType * getType() const
Overload to return most specific pointer type.
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
Register getLocReg() const
LocInfo getLocInfo() const
unsigned getValNo() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:131
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
This is an important base class in LLVM.
Definition Constant.h:43
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
Register fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, Register Op0, uint64_t Imm)
Emit a MachineInstr with a register operand, an immediate, and a result register in the given registe...
Register fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, Register Op0, Register Op1)
Emit a MachineInstr with two register operands and a result register in the given register class.
Register fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, Register Op0)
Emit a MachineInstr with one register operand and a result register in the given register class.
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
MachineBasicBlock::iterator InsertPt
MBB - The current insert position inside the current block.
MachineBasicBlock * MBB
MBB - The current block.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Machine Value Type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MachineInstrBundleIterator< MachineInstr > iterator
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
const PPCTargetLowering * getTargetLowering() const override
const PPCInstrInfo * getInstrInfo() const override
bool isLittleEndian() const
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void push_back(const T &Elt)
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:743
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned getID() const
Return the register class ID number.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
bool isOSAIX() const
Tests whether the OS is AIX.
Definition Triple.h:776
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
const Use * const_op_iterator
Definition User.h:280
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ MO_TOC_LO
Definition PPC.h:185
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool RetCC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
generic_gep_type_iterator<> gep_type_iterator
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.