LLVM  13.0.0git
AMDGPUCallLowering.cpp
Go to the documentation of this file.
1 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the lowering of LLVM calls to machine code calls for
11 /// GlobalISel.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUCallLowering.h"
16 #include "AMDGPU.h"
17 #include "AMDGPULegalizerInfo.h"
18 #include "AMDGPUTargetMachine.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIRegisterInfo.h"
21 #include "llvm/CodeGen/Analysis.h"
24 #include "llvm/IR/IntrinsicsAMDGPU.h"
25 
26 #define DEBUG_TYPE "amdgpu-call-lowering"
27 
28 using namespace llvm;
29 
30 namespace {
31 
32 /// Wrapper around extendRegister to ensure we extend to a full 32-bit register.
33 static Register extendRegisterMin32(CallLowering::ValueHandler &Handler,
34  Register ValVReg, CCValAssign &VA) {
35  if (VA.getLocVT().getSizeInBits() < 32) {
36  // 16-bit types are reported as legal for 32-bit registers. We need to
37  // extend and do a 32-bit copy to avoid the verifier complaining about it.
38  return Handler.MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
39  }
40 
41  return Handler.extendRegister(ValVReg, VA);
42 }
43 
44 struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
45  AMDGPUOutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
46  MachineInstrBuilder MIB, CCAssignFn *AssignFn)
47  : OutgoingValueHandler(B, MRI, AssignFn), MIB(MIB) {}
48 
50 
51  Register getStackAddress(uint64_t Size, int64_t Offset,
52  MachinePointerInfo &MPO,
53  ISD::ArgFlagsTy Flags) override {
54  llvm_unreachable("not implemented");
55  }
56 
57  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
58  MachinePointerInfo &MPO, CCValAssign &VA) override {
59  llvm_unreachable("not implemented");
60  }
61 
62  void assignValueToReg(Register ValVReg, Register PhysReg,
63  CCValAssign &VA) override {
64  Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
65 
66  // If this is a scalar return, insert a readfirstlane just in case the value
67  // ends up in a VGPR.
68  // FIXME: Assert this is a shader return.
69  const SIRegisterInfo *TRI
70  = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
71  if (TRI->isSGPRReg(MRI, PhysReg)) {
72  auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
73  {MRI.getType(ExtReg)}, false)
74  .addReg(ExtReg);
75  ExtReg = ToSGPR.getReg(0);
76  }
77 
78  MIRBuilder.buildCopy(PhysReg, ExtReg);
79  MIB.addUse(PhysReg, RegState::Implicit);
80  }
81 
82  bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
83  CCValAssign::LocInfo LocInfo,
85  ISD::ArgFlagsTy Flags,
86  CCState &State) override {
87  return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
88  }
89 };
90 
91 struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
92  uint64_t StackUsed = 0;
93 
94  AMDGPUIncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
95  CCAssignFn *AssignFn)
96  : IncomingValueHandler(B, MRI, AssignFn) {}
97 
98  Register getStackAddress(uint64_t Size, int64_t Offset,
99  MachinePointerInfo &MPO,
100  ISD::ArgFlagsTy Flags) override {
101  auto &MFI = MIRBuilder.getMF().getFrameInfo();
102 
103  // Byval is assumed to be writable memory, but other stack passed arguments
104  // are not.
105  const bool IsImmutable = !Flags.isByVal();
106  int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);
107  MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
108  auto AddrReg = MIRBuilder.buildFrameIndex(
110  StackUsed = std::max(StackUsed, Size + Offset);
111  return AddrReg.getReg(0);
112  }
113 
114  void assignValueToReg(Register ValVReg, Register PhysReg,
115  CCValAssign &VA) override {
116  markPhysRegUsed(PhysReg);
117 
118  if (VA.getLocVT().getSizeInBits() < 32) {
119  // 16-bit types are reported as legal for 32-bit registers. We need to do
120  // a 32-bit copy, and truncate to avoid the verifier complaining about it.
121  auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
122 
123  // If we have signext/zeroext, it applies to the whole 32-bit register
124  // before truncation.
125  auto Extended =
126  buildExtensionHint(VA, Copy.getReg(0), LLT(VA.getLocVT()));
127  MIRBuilder.buildTrunc(ValVReg, Extended);
128  return;
129  }
130 
131  IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
132  }
133 
134  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t MemSize,
135  MachinePointerInfo &MPO, CCValAssign &VA) override {
136  MachineFunction &MF = MIRBuilder.getMF();
137 
138  // The reported memory location may be wider than the value.
139  const LLT RegTy = MRI.getType(ValVReg);
140  MemSize = std::min(static_cast<uint64_t>(RegTy.getSizeInBytes()), MemSize);
141 
142  // FIXME: Get alignment
143  auto MMO = MF.getMachineMemOperand(
145  inferAlignFromPtrInfo(MF, MPO));
146  MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
147  }
148 
149  /// How the physical register gets marked varies between formal
150  /// parameters (it's a basic-block live-in), and a call instruction
151  /// (it's an implicit-def of the BL).
152  virtual void markPhysRegUsed(unsigned PhysReg) = 0;
153 };
154 
155 struct FormalArgHandler : public AMDGPUIncomingArgHandler {
156  FormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
157  CCAssignFn *AssignFn)
158  : AMDGPUIncomingArgHandler(B, MRI, AssignFn) {}
159 
160  void markPhysRegUsed(unsigned PhysReg) override {
161  MIRBuilder.getMBB().addLiveIn(PhysReg);
162  }
163 };
164 
165 struct CallReturnHandler : public AMDGPUIncomingArgHandler {
166  CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
167  MachineInstrBuilder MIB, CCAssignFn *AssignFn)
168  : AMDGPUIncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
169 
170  void markPhysRegUsed(unsigned PhysReg) override {
171  MIB.addDef(PhysReg, RegState::Implicit);
172  }
173 
175 };
176 
177 struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
178  CCAssignFn *AssignFnVarArg;
179 
180  /// For tail calls, the byte offset of the call's argument area from the
181  /// callee's. Unused elsewhere.
182  int FPDiff;
183 
184  // Cache the SP register vreg if we need it more than once in this call site.
185  Register SPReg;
186 
187  bool IsTailCall;
188 
189  AMDGPUOutgoingArgHandler(MachineIRBuilder &MIRBuilder,
191  CCAssignFn *AssignFn, CCAssignFn *AssignFnVarArg,
192  bool IsTailCall = false, int FPDiff = 0)
193  : AMDGPUOutgoingValueHandler(MIRBuilder, MRI, MIB, AssignFn),
194  AssignFnVarArg(AssignFnVarArg), FPDiff(FPDiff), IsTailCall(IsTailCall) {
195  }
196 
197  Register getStackAddress(uint64_t Size, int64_t Offset,
198  MachinePointerInfo &MPO,
199  ISD::ArgFlagsTy Flags) override {
200  MachineFunction &MF = MIRBuilder.getMF();
201  const LLT PtrTy = LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32);
202  const LLT S32 = LLT::scalar(32);
203 
204  if (IsTailCall) {
205  llvm_unreachable("implement me");
206  }
207 
209 
210  if (!SPReg)
211  SPReg = MIRBuilder.buildCopy(PtrTy, MFI->getStackPtrOffsetReg()).getReg(0);
212 
213  auto OffsetReg = MIRBuilder.buildConstant(S32, Offset);
214 
215  auto AddrReg = MIRBuilder.buildPtrAdd(PtrTy, SPReg, OffsetReg);
217  return AddrReg.getReg(0);
218  }
219 
220  void assignValueToReg(Register ValVReg, Register PhysReg,
221  CCValAssign &VA) override {
222  MIB.addUse(PhysReg, RegState::Implicit);
223  Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
224  MIRBuilder.buildCopy(PhysReg, ExtReg);
225  }
226 
227  void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
228  MachinePointerInfo &MPO, CCValAssign &VA) override {
229  MachineFunction &MF = MIRBuilder.getMF();
230  uint64_t LocMemOffset = VA.getLocMemOffset();
231  const auto &ST = MF.getSubtarget<GCNSubtarget>();
232 
233  auto MMO = MF.getMachineMemOperand(
235  commonAlignment(ST.getStackAlignment(), LocMemOffset));
236  MIRBuilder.buildStore(ValVReg, Addr, *MMO);
237  }
238 
239  void assignValueToAddress(const CallLowering::ArgInfo &Arg,
240  unsigned ValRegIndex, Register Addr,
241  uint64_t MemSize, MachinePointerInfo &MPO,
242  CCValAssign &VA) override {
243  Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt
244  ? extendRegister(Arg.Regs[ValRegIndex], VA)
245  : Arg.Regs[ValRegIndex];
246 
247  // If we extended the value type we might need to adjust the MMO's
248  // Size. This happens if ComputeValueVTs widened a small type value to a
249  // legal register type (e.g. s8->s16)
250  const LLT RegTy = MRI.getType(ValVReg);
251  MemSize = std::min(MemSize, (uint64_t)RegTy.getSizeInBytes());
252  assignValueToAddress(ValVReg, Addr, MemSize, MPO, VA);
253  }
254 };
255 }
256 
258  : CallLowering(&TLI) {
259 }
260 
261 // FIXME: Compatability shim
262 static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) {
263  switch (MIOpc) {
264  case TargetOpcode::G_SEXT:
265  return ISD::SIGN_EXTEND;
266  case TargetOpcode::G_ZEXT:
267  return ISD::ZERO_EXTEND;
268  case TargetOpcode::G_ANYEXT:
269  return ISD::ANY_EXTEND;
270  default:
271  llvm_unreachable("not an extend opcode");
272  }
273 }
274 
275 bool AMDGPUCallLowering::canLowerReturn(MachineFunction &MF,
276  CallingConv::ID CallConv,
278  bool IsVarArg) const {
279  // For shaders. Vector types should be explicitly handled by CC.
280  if (AMDGPU::isEntryFunctionCC(CallConv))
281  return true;
282 
284  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
285  CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
286  MF.getFunction().getContext());
287 
288  return checkReturn(CCInfo, Outs, TLI.CCAssignFnForReturn(CallConv, IsVarArg));
289 }
290 
291 /// Lower the return value for the already existing \p Ret. This assumes that
292 /// \p B's insertion point is correct.
293 bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B,
294  const Value *Val, ArrayRef<Register> VRegs,
295  MachineInstrBuilder &Ret) const {
296  if (!Val)
297  return true;
298 
299  auto &MF = B.getMF();
300  const auto &F = MF.getFunction();
301  const DataLayout &DL = MF.getDataLayout();
302  MachineRegisterInfo *MRI = B.getMRI();
303  LLVMContext &Ctx = F.getContext();
304 
305  CallingConv::ID CC = F.getCallingConv();
306  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
307 
308  SmallVector<EVT, 8> SplitEVTs;
309  ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
310  assert(VRegs.size() == SplitEVTs.size() &&
311  "For each split Type there should be exactly one VReg.");
312 
313  SmallVector<ArgInfo, 8> SplitRetInfos;
314 
315  for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
316  EVT VT = SplitEVTs[i];
317  Register Reg = VRegs[i];
318  ArgInfo RetInfo(Reg, VT.getTypeForEVT(Ctx));
320 
321  if (VT.isScalarInteger()) {
322  unsigned ExtendOp = TargetOpcode::G_ANYEXT;
323  if (RetInfo.Flags[0].isSExt()) {
324  assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
325  ExtendOp = TargetOpcode::G_SEXT;
326  } else if (RetInfo.Flags[0].isZExt()) {
327  assert(RetInfo.Regs.size() == 1 && "expect only simple return values");
328  ExtendOp = TargetOpcode::G_ZEXT;
329  }
330 
331  EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT,
332  extOpcodeToISDExtOpcode(ExtendOp));
333  if (ExtVT != VT) {
334  RetInfo.Ty = ExtVT.getTypeForEVT(Ctx);
335  LLT ExtTy = getLLTForType(*RetInfo.Ty, DL);
336  Reg = B.buildInstr(ExtendOp, {ExtTy}, {Reg}).getReg(0);
337  }
338  }
339 
340  if (Reg != RetInfo.Regs[0]) {
341  RetInfo.Regs[0] = Reg;
342  // Reset the arg flags after modifying Reg.
344  }
345 
346  splitToValueTypes(RetInfo, SplitRetInfos, DL, CC);
347  }
348 
349  CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg());
350  AMDGPUOutgoingValueHandler RetHandler(B, *MRI, Ret, AssignFn);
351  return handleAssignments(B, SplitRetInfos, RetHandler, CC, F.isVarArg());
352 }
353 
355  ArrayRef<Register> VRegs,
356  FunctionLoweringInfo &FLI) const {
357 
358  MachineFunction &MF = B.getMF();
361  MFI->setIfReturnsVoid(!Val);
362 
363  assert(!Val == VRegs.empty() && "Return value without a vreg");
364 
365  CallingConv::ID CC = B.getMF().getFunction().getCallingConv();
366  const bool IsShader = AMDGPU::isShader(CC);
367  const bool IsWaveEnd =
368  (IsShader && MFI->returnsVoid()) || AMDGPU::isKernel(CC);
369  if (IsWaveEnd) {
370  B.buildInstr(AMDGPU::S_ENDPGM)
371  .addImm(0);
372  return true;
373  }
374 
375  auto const &ST = MF.getSubtarget<GCNSubtarget>();
376 
377  unsigned ReturnOpc =
378  IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return;
379 
380  auto Ret = B.buildInstrNoInsert(ReturnOpc);
381  Register ReturnAddrVReg;
382  if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
383  ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass);
384  Ret.addUse(ReturnAddrVReg);
385  }
386 
387  if (!FLI.CanLowerReturn)
388  insertSRetStores(B, Val->getType(), VRegs, FLI.DemoteRegister);
389  else if (!lowerReturnVal(B, Val, VRegs, Ret))
390  return false;
391 
392  if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
393  const SIRegisterInfo *TRI = ST.getRegisterInfo();
394  Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF),
395  &AMDGPU::SGPR_64RegClass);
396  B.buildCopy(ReturnAddrVReg, LiveInReturn);
397  }
398 
399  // TODO: Handle CalleeSavedRegsViaCopy.
400 
401  B.insertInstr(Ret);
402  return true;
403 }
404 
405 void AMDGPUCallLowering::lowerParameterPtr(Register DstReg, MachineIRBuilder &B,
406  Type *ParamTy,
407  uint64_t Offset) const {
408  MachineFunction &MF = B.getMF();
411  Register KernArgSegmentPtr =
413  Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
414 
415  auto OffsetReg = B.buildConstant(LLT::scalar(64), Offset);
416 
417  B.buildPtrAdd(DstReg, KernArgSegmentVReg, OffsetReg);
418 }
419 
420 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, Type *ParamTy,
421  uint64_t Offset, Align Alignment,
422  Register DstReg) const {
423  MachineFunction &MF = B.getMF();
424  const Function &F = MF.getFunction();
425  const DataLayout &DL = F.getParent()->getDataLayout();
427  unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
428 
430  Register PtrReg = B.getMRI()->createGenericVirtualRegister(PtrTy);
431  lowerParameterPtr(PtrReg, B, ParamTy, Offset);
432 
434  PtrInfo,
437  TypeSize, Alignment);
438 
439  B.buildLoad(DstReg, PtrReg, *MMO);
440 }
441 
442 // Allocate special inputs passed in user SGPRs.
443 static void allocateHSAUserSGPRs(CCState &CCInfo,
445  MachineFunction &MF,
446  const SIRegisterInfo &TRI,
448  // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
449  if (Info.hasPrivateSegmentBuffer()) {
450  Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
451  MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
452  CCInfo.AllocateReg(PrivateSegmentBufferReg);
453  }
454 
455  if (Info.hasDispatchPtr()) {
456  Register DispatchPtrReg = Info.addDispatchPtr(TRI);
457  MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
458  CCInfo.AllocateReg(DispatchPtrReg);
459  }
460 
461  if (Info.hasQueuePtr()) {
462  Register QueuePtrReg = Info.addQueuePtr(TRI);
463  MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
464  CCInfo.AllocateReg(QueuePtrReg);
465  }
466 
467  if (Info.hasKernargSegmentPtr()) {
469  Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
472  MRI.addLiveIn(InputPtrReg, VReg);
473  B.getMBB().addLiveIn(InputPtrReg);
474  B.buildCopy(VReg, InputPtrReg);
475  CCInfo.AllocateReg(InputPtrReg);
476  }
477 
478  if (Info.hasDispatchID()) {
479  Register DispatchIDReg = Info.addDispatchID(TRI);
480  MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
481  CCInfo.AllocateReg(DispatchIDReg);
482  }
483 
484  if (Info.hasFlatScratchInit()) {
485  Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);
486  MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
487  CCInfo.AllocateReg(FlatScratchInitReg);
488  }
489 
490  // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
491  // these from the dispatch pointer.
492 }
493 
495  MachineIRBuilder &B, const Function &F,
496  ArrayRef<ArrayRef<Register>> VRegs) const {
497  MachineFunction &MF = B.getMF();
498  const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
501  const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
502  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
503  const DataLayout &DL = F.getParent()->getDataLayout();
504 
505  Info->allocateModuleLDSGlobal(F.getParent());
506 
508  CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
509 
510  allocateHSAUserSGPRs(CCInfo, B, MF, *TRI, *Info);
511 
512  unsigned i = 0;
513  const Align KernArgBaseAlign(16);
514  const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
515  uint64_t ExplicitArgOffset = 0;
516 
517  // TODO: Align down to dword alignment and extract bits for extending loads.
518  for (auto &Arg : F.args()) {
519  const bool IsByRef = Arg.hasByRefAttr();
520  Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
521  unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
522  if (AllocSize == 0)
523  continue;
524 
525  MaybeAlign ABIAlign = IsByRef ? Arg.getParamAlign() : None;
526  if (!ABIAlign)
527  ABIAlign = DL.getABITypeAlign(ArgTy);
528 
529  uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
530  ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
531 
532  if (Arg.use_empty()) {
533  ++i;
534  continue;
535  }
536 
537  Align Alignment = commonAlignment(KernArgBaseAlign, ArgOffset);
538 
539  if (IsByRef) {
540  unsigned ByRefAS = cast<PointerType>(Arg.getType())->getAddressSpace();
541 
542  assert(VRegs[i].size() == 1 &&
543  "expected only one register for byval pointers");
544  if (ByRefAS == AMDGPUAS::CONSTANT_ADDRESS) {
545  lowerParameterPtr(VRegs[i][0], B, ArgTy, ArgOffset);
546  } else {
547  const LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
548  Register PtrReg = MRI.createGenericVirtualRegister(ConstPtrTy);
549  lowerParameterPtr(PtrReg, B, ArgTy, ArgOffset);
550 
551  B.buildAddrSpaceCast(VRegs[i][0], PtrReg);
552  }
553  } else {
554  ArrayRef<Register> OrigArgRegs = VRegs[i];
555  Register ArgReg =
556  OrigArgRegs.size() == 1
557  ? OrigArgRegs[0]
559 
560  lowerParameter(B, ArgTy, ArgOffset, Alignment, ArgReg);
561  if (OrigArgRegs.size() > 1)
562  unpackRegs(OrigArgRegs, ArgReg, ArgTy, B);
563  }
564 
565  ++i;
566  }
567 
568  TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
569  TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
570  return true;
571 }
572 
575  FunctionLoweringInfo &FLI) const {
576  CallingConv::ID CC = F.getCallingConv();
577 
578  // The infrastructure for normal calling convention lowering is essentially
579  // useless for kernels. We want to avoid any kind of legalization or argument
580  // splitting.
581  if (CC == CallingConv::AMDGPU_KERNEL)
582  return lowerFormalArgumentsKernel(B, F, VRegs);
583 
584  const bool IsGraphics = AMDGPU::isGraphics(CC);
585  const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC);
586 
587  MachineFunction &MF = B.getMF();
588  MachineBasicBlock &MBB = B.getMBB();
591  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
592  const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
593  const DataLayout &DL = F.getParent()->getDataLayout();
594 
595  Info->allocateModuleLDSGlobal(F.getParent());
596 
598  CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
599 
600  if (!IsEntryFunc) {
601  Register ReturnAddrReg = TRI->getReturnAddressReg(MF);
602  Register LiveInReturn = MF.addLiveIn(ReturnAddrReg,
603  &AMDGPU::SGPR_64RegClass);
604  MBB.addLiveIn(ReturnAddrReg);
605  B.buildCopy(LiveInReturn, ReturnAddrReg);
606  }
607 
608  if (Info->hasImplicitBufferPtr()) {
609  Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
610  MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
611  CCInfo.AllocateReg(ImplicitBufferPtrReg);
612  }
613 
614  SmallVector<ArgInfo, 32> SplitArgs;
615  unsigned Idx = 0;
616  unsigned PSInputNum = 0;
617 
618  // Insert the hidden sret parameter if the return value won't fit in the
619  // return registers.
620  if (!FLI.CanLowerReturn)
621  insertSRetIncomingArgument(F, SplitArgs, FLI.DemoteRegister, MRI, DL);
622 
623  for (auto &Arg : F.args()) {
624  if (DL.getTypeStoreSize(Arg.getType()) == 0)
625  continue;
626 
627  const bool InReg = Arg.hasAttribute(Attribute::InReg);
628 
629  // SGPR arguments to functions not implemented.
630  if (!IsGraphics && InReg)
631  return false;
632 
633  if (Arg.hasAttribute(Attribute::SwiftSelf) ||
634  Arg.hasAttribute(Attribute::SwiftError) ||
635  Arg.hasAttribute(Attribute::Nest))
636  return false;
637 
638  if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) {
639  const bool ArgUsed = !Arg.use_empty();
640  bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);
641 
642  if (!SkipArg) {
643  Info->markPSInputAllocated(PSInputNum);
644  if (ArgUsed)
645  Info->markPSInputEnabled(PSInputNum);
646  }
647 
648  ++PSInputNum;
649 
650  if (SkipArg) {
651  for (int I = 0, E = VRegs[Idx].size(); I != E; ++I)
652  B.buildUndef(VRegs[Idx][I]);
653 
654  ++Idx;
655  continue;
656  }
657  }
658 
659  ArgInfo OrigArg(VRegs[Idx], Arg);
660  const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex;
661  setArgFlags(OrigArg, OrigArgIdx, DL, F);
662 
663  splitToValueTypes(OrigArg, SplitArgs, DL, CC);
664  ++Idx;
665  }
666 
667  // At least one interpolation mode must be enabled or else the GPU will
668  // hang.
669  //
670  // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
671  // set PSInputAddr, the user wants to enable some bits after the compilation
672  // based on run-time states. Since we can't know what the final PSInputEna
673  // will look like, so we shouldn't do anything here and the user should take
674  // responsibility for the correct programming.
675  //
676  // Otherwise, the following restrictions apply:
677  // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
678  // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
679  // enabled too.
680  if (CC == CallingConv::AMDGPU_PS) {
681  if ((Info->getPSInputAddr() & 0x7F) == 0 ||
682  ((Info->getPSInputAddr() & 0xF) == 0 &&
683  Info->isPSInputAllocated(11))) {
684  CCInfo.AllocateReg(AMDGPU::VGPR0);
685  CCInfo.AllocateReg(AMDGPU::VGPR1);
686  Info->markPSInputAllocated(0);
687  Info->markPSInputEnabled(0);
688  }
689 
690  if (Subtarget.isAmdPalOS()) {
691  // For isAmdPalOS, the user does not enable some bits after compilation
692  // based on run-time states; the register values being generated here are
693  // the final ones set in hardware. Therefore we need to apply the
694  // workaround to PSInputAddr and PSInputEnable together. (The case where
695  // a bit is set in PSInputAddr but not PSInputEnable is where the frontend
696  // set up an input arg for a particular interpolation mode, but nothing
697  // uses that input arg. Really we should have an earlier pass that removes
698  // such an arg.)
699  unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
700  if ((PsInputBits & 0x7F) == 0 ||
701  ((PsInputBits & 0xF) == 0 &&
702  (PsInputBits >> 11 & 1)))
703  Info->markPSInputEnabled(
704  countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
705  }
706  }
707 
708  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
709  CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());
710 
711  if (!MBB.empty())
712  B.setInstr(*MBB.begin());
713 
714  if (!IsEntryFunc) {
715  // For the fixed ABI, pass workitem IDs in the last argument register.
717  TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
718  }
719 
720  FormalArgHandler Handler(B, MRI, AssignFn);
721  if (!handleAssignments(CCInfo, ArgLocs, B, SplitArgs, Handler))
722  return false;
723 
724  if (!IsEntryFunc && !AMDGPUTargetMachine::EnableFixedFunctionABI) {
725  // Special inputs come after user arguments.
726  TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
727  }
728 
729  // Start adding system SGPRs.
730  if (IsEntryFunc) {
731  TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsGraphics);
732  } else {
733  if (!Subtarget.enableFlatScratch())
734  CCInfo.AllocateReg(Info->getScratchRSrcReg());
735  TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
736  }
737 
738  // Move back to the end of the basic block.
739  B.setMBB(MBB);
740 
741  return true;
742 }
743 
745  CCState &CCInfo,
746  SmallVectorImpl<std::pair<MCRegister, Register>> &ArgRegs,
747  CallLoweringInfo &Info) const {
748  MachineFunction &MF = MIRBuilder.getMF();
749 
750  const AMDGPUFunctionArgInfo *CalleeArgInfo
752 
754  const AMDGPUFunctionArgInfo &CallerArgInfo = MFI->getArgInfo();
755 
756 
757  // TODO: Unify with private memory register handling. This is complicated by
758  // the fact that at least in kernels, the input argument is not necessarily
759  // in the same location as the input.
768  };
769 
771 
772  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
773  const AMDGPULegalizerInfo *LI
774  = static_cast<const AMDGPULegalizerInfo*>(ST.getLegalizerInfo());
775 
776  for (auto InputID : InputRegs) {
777  const ArgDescriptor *OutgoingArg;
778  const TargetRegisterClass *ArgRC;
779  LLT ArgTy;
780 
781  std::tie(OutgoingArg, ArgRC, ArgTy) =
782  CalleeArgInfo->getPreloadedValue(InputID);
783  if (!OutgoingArg)
784  continue;
785 
786  const ArgDescriptor *IncomingArg;
787  const TargetRegisterClass *IncomingArgRC;
788  std::tie(IncomingArg, IncomingArgRC, ArgTy) =
789  CallerArgInfo.getPreloadedValue(InputID);
790  assert(IncomingArgRC == ArgRC);
791 
792  Register InputReg = MRI.createGenericVirtualRegister(ArgTy);
793 
794  if (IncomingArg) {
795  LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
796  } else {
798  LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder);
799  }
800 
801  if (OutgoingArg->isRegister()) {
802  ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
803  if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
804  report_fatal_error("failed to allocate implicit input argument");
805  } else {
806  LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");
807  return false;
808  }
809  }
810 
811  // Pack workitem IDs into a single register or pass it as is if already
812  // packed.
813  const ArgDescriptor *OutgoingArg;
814  const TargetRegisterClass *ArgRC;
815  LLT ArgTy;
816 
817  std::tie(OutgoingArg, ArgRC, ArgTy) =
819  if (!OutgoingArg)
820  std::tie(OutgoingArg, ArgRC, ArgTy) =
822  if (!OutgoingArg)
823  std::tie(OutgoingArg, ArgRC, ArgTy) =
825  if (!OutgoingArg)
826  return false;
827 
828  auto WorkitemIDX =
829  CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
830  auto WorkitemIDY =
831  CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
832  auto WorkitemIDZ =
833  CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
834 
835  const ArgDescriptor *IncomingArgX = std::get<0>(WorkitemIDX);
836  const ArgDescriptor *IncomingArgY = std::get<0>(WorkitemIDY);
837  const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
838  const LLT S32 = LLT::scalar(32);
839 
840  // If incoming ids are not packed we need to pack them.
841  // FIXME: Should consider known workgroup size to eliminate known 0 cases.
842  Register InputReg;
843  if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX) {
844  InputReg = MRI.createGenericVirtualRegister(S32);
845  LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
846  std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
847  }
848 
849  if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY) {
851  LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
852  std::get<2>(WorkitemIDY));
853 
854  Y = MIRBuilder.buildShl(S32, Y, MIRBuilder.buildConstant(S32, 10)).getReg(0);
855  InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y;
856  }
857 
858  if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ) {
860  LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
861  std::get<2>(WorkitemIDZ));
862 
863  Z = MIRBuilder.buildShl(S32, Z, MIRBuilder.buildConstant(S32, 20)).getReg(0);
864  InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;
865  }
866 
867  if (!InputReg) {
868  InputReg = MRI.createGenericVirtualRegister(S32);
869 
870  // Workitem ids are already packed, any of present incoming arguments will
871  // carry all required fields.
873  IncomingArgX ? *IncomingArgX :
874  IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);
875  LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,
876  &AMDGPU::VGPR_32RegClass, S32);
877  }
878 
879  if (OutgoingArg->isRegister()) {
880  ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
881  if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
882  report_fatal_error("failed to allocate implicit input argument");
883  } else {
884  LLVM_DEBUG(dbgs() << "Unhandled stack passed implicit input argument\n");
885  return false;
886  }
887 
888  return true;
889 }
890 
891 /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
892 /// CC.
893 static std::pair<CCAssignFn *, CCAssignFn *>
895  return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
896 }
897 
898 static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
899  bool IsTailCall) {
900  return AMDGPU::SI_CALL;
901 }
902 
903 // Add operands to call instruction to track the callee.
905  MachineIRBuilder &MIRBuilder,
906  AMDGPUCallLowering::CallLoweringInfo &Info) {
907  if (Info.Callee.isReg()) {
908  CallInst.addReg(Info.Callee.getReg());
909  CallInst.addImm(0);
910  } else if (Info.Callee.isGlobal() && Info.Callee.getOffset() == 0) {
911  // The call lowering lightly assumed we can directly encode a call target in
912  // the instruction, which is not the case. Materialize the address here.
913  const GlobalValue *GV = Info.Callee.getGlobal();
914  auto Ptr = MIRBuilder.buildGlobalValue(
915  LLT::pointer(GV->getAddressSpace(), 64), GV);
916  CallInst.addReg(Ptr.getReg(0));
917  CallInst.add(Info.Callee);
918  } else
919  return false;
920 
921  return true;
922 }
923 
925  CallLoweringInfo &Info) const {
926  if (Info.IsVarArg) {
927  LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n");
928  return false;
929  }
930 
931  MachineFunction &MF = MIRBuilder.getMF();
932  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
933  const SIRegisterInfo *TRI = ST.getRegisterInfo();
934 
935  const Function &F = MF.getFunction();
937  const SITargetLowering &TLI = *getTLI<SITargetLowering>();
938  const DataLayout &DL = F.getParent()->getDataLayout();
939  CallingConv::ID CallConv = F.getCallingConv();
940 
942  CallConv != CallingConv::AMDGPU_Gfx) {
943  LLVM_DEBUG(dbgs() << "Variable function ABI not implemented\n");
944  return false;
945  }
946 
947  if (AMDGPU::isShader(CallConv)) {
948  LLVM_DEBUG(dbgs() << "Unhandled call from graphics shader\n");
949  return false;
950  }
951 
952  SmallVector<ArgInfo, 8> OutArgs;
953  for (auto &OrigArg : Info.OrigArgs)
954  splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
955 
957  if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy())
958  splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv);
959 
960  // If we can lower as a tail call, do that instead.
961  bool CanTailCallOpt = false;
962 
963  // We must emit a tail call if we have musttail.
964  if (Info.IsMustTailCall && !CanTailCallOpt) {
965  LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
966  return false;
967  }
968 
969  // Find out which ABI gets to decide where things go.
970  CCAssignFn *AssignFnFixed;
971  CCAssignFn *AssignFnVarArg;
972  std::tie(AssignFnFixed, AssignFnVarArg) =
973  getAssignFnsForCC(Info.CallConv, TLI);
974 
975  MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP)
976  .addImm(0)
977  .addImm(0);
978 
979  // Create a temporarily-floating call instruction so we can add the implicit
980  // uses of arg registers.
981  unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
982 
983  auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
984  MIB.addDef(TRI->getReturnAddressReg(MF));
985 
986  if (!addCallTargetOperands(MIB, MIRBuilder, Info))
987  return false;
988 
989  // Tell the call which registers are clobbered.
990  const uint32_t *Mask = TRI->getCallPreservedMask(MF, Info.CallConv);
991  MIB.addRegMask(Mask);
992 
994  CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext());
995 
996  // We could pass MIB and directly add the implicit uses to the call
997  // now. However, as an aesthetic choice, place implicit argument operands
998  // after the ordinary user argument registers.
999  SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs;
1000 
1002  Info.CallConv != CallingConv::AMDGPU_Gfx) {
1003  // With a fixed ABI, allocate fixed registers before user arguments.
1004  if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info))
1005  return false;
1006  }
1007 
1008  // Do the actual argument marshalling.
1009  SmallVector<Register, 8> PhysRegs;
1010  AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
1011  AssignFnVarArg, false);
1012  if (!handleAssignments(CCInfo, ArgLocs, MIRBuilder, OutArgs, Handler))
1013  return false;
1014 
1016 
1017  if (!ST.enableFlatScratch()) {
1018  // Insert copies for the SRD. In the HSA case, this should be an identity
1019  // copy.
1020  auto ScratchRSrcReg = MIRBuilder.buildCopy(LLT::vector(4, 32),
1021  MFI->getScratchRSrcReg());
1022  MIRBuilder.buildCopy(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
1023  MIB.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Implicit);
1024  }
1025 
1026  for (std::pair<MCRegister, Register> ArgReg : ImplicitArgRegs) {
1027  MIRBuilder.buildCopy((Register)ArgReg.first, ArgReg.second);
1028  MIB.addReg(ArgReg.first, RegState::Implicit);
1029  }
1030 
1031  // Get a count of how many bytes are to be pushed on the stack.
1032  unsigned NumBytes = CCInfo.getNextStackOffset();
1033 
1034  // If Callee is a reg, since it is used by a target specific
1035  // instruction, it must have a register class matching the
1036  // constraint of that instruction.
1037 
1038  // FIXME: We should define regbankselectable call instructions to handle
1039  // divergent call targets.
1040  if (MIB->getOperand(1).isReg()) {
1041  MIB->getOperand(1).setReg(constrainOperandRegClass(
1042  MF, *TRI, MRI, *ST.getInstrInfo(),
1043  *ST.getRegBankInfo(), *MIB, MIB->getDesc(), MIB->getOperand(1),
1044  1));
1045  }
1046 
1047  // Now we can add the actual call instruction to the correct position.
1048  MIRBuilder.insertInstr(MIB);
1049 
1050  // Finally we can copy the returned value back into its virtual-register. In
1051  // symmetry with the arguments, the physical register must be an
1052  // implicit-define of the call instruction.
1053  if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {
1054  CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv,
1055  Info.IsVarArg);
1056  CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn);
1057  if (!handleAssignments(MIRBuilder, InArgs, Handler, Info.CallConv,
1058  Info.IsVarArg))
1059  return false;
1060  }
1061 
1062  uint64_t CalleePopBytes = NumBytes;
1063 
1064  MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN)
1065  .addImm(0)
1066  .addImm(CalleePopBytes);
1067 
1068  if (!Info.CanLowerReturn) {
1069  insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,
1070  Info.DemoteRegister, Info.DemoteStackIndex);
1071  }
1072 
1073  return true;
1074 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:153
llvm::MachineRegisterInfo::addLiveIn
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
Definition: MachineRegisterInfo.h:929
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:158
llvm::AMDGPUFunctionArgInfo::PreloadedValue
PreloadedValue
Definition: AMDGPUArgumentUsageInfo.h:98
llvm::SIMachineFunctionInfo::setIfReturnsVoid
void setIfReturnsVoid(bool Value)
Definition: SIMachineFunctionInfo.h:836
llvm::AMDGPUFunctionArgInfo::QUEUE_PTR
@ QUEUE_PTR
Definition: AMDGPUArgumentUsageInfo.h:102
llvm::AMDGPUTargetMachine::EnableFixedFunctionABI
static bool EnableFixedFunctionABI
Definition: AMDGPUTargetMachine.h:37
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:132
llvm
Definition: AllocatorList.h:23
llvm::MachineIRBuilder::buildGlobalValue
MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV)
Build and insert Res = G_GLOBAL_VALUE GV.
Definition: MachineIRBuilder.cpp:146
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MachineIRBuilder::buildOr
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_OR Op0, Op1.
Definition: MachineIRBuilder.h:1528
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:144
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::SIMachineFunctionInfo::getPreloadedReg
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Definition: SIMachineFunctionInfo.h:692
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:27
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:191
SIMachineFunctionInfo.h
llvm::ArgDescriptor::createArg
static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
Definition: AMDGPUArgumentUsageInfo.h:54
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::SITargetLowering::allocateSystemSGPRs
void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const
Definition: SIISelLowering.cpp:2064
llvm::ArgDescriptor
Definition: AMDGPUArgumentUsageInfo.h:23
llvm::Function
Definition: Function.h:61
allocateHSAUserSGPRs
static void allocateHSAUserSGPRs(CCState &CCInfo, MachineIRBuilder &B, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
Definition: AMDGPUCallLowering.cpp:443
llvm::AMDGPUCallLowering::AMDGPUCallLowering
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
Definition: AMDGPUCallLowering.cpp:257
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::SIMachineFunctionInfo::getArgInfo
AMDGPUFunctionArgInfo & getArgInfo()
Definition: SIMachineFunctionInfo.h:679
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:430
llvm::CallLowering::ValueHandler::extendRegister
Register extendRegister(Register ValReg, CCValAssign &VA, unsigned MaxSizeBits=0)
Extend a register to the location type given in VA, capped at extending to at most MaxSize bits.
Definition: CallLowering.cpp:1017
llvm::MachineRegisterInfo::getTargetRegisterInfo
const TargetRegisterInfo * getTargetRegisterInfo() const
Definition: MachineRegisterInfo.h:153
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:144
llvm::AMDGPUArgumentUsageInfo::FixedABIFunctionInfo
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
Definition: AMDGPUArgumentUsageInfo.h:166
llvm::CallLowering::ValueHandler
Argument handling is mostly uniform between the four places that make these decisions: function forma...
Definition: CallLowering.h:145
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:717
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:312
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::CallLowering::OutgoingValueHandler
Definition: CallLowering.h:244
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:142
llvm::MachineRegisterInfo::getLiveInVirtReg
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
Definition: MachineRegisterInfo.cpp:454
addCallTargetOperands
static bool addCallTargetOperands(MachineInstrBuilder &CallInst, MachineIRBuilder &MIRBuilder, AMDGPUCallLowering::CallLoweringInfo &Info)
Definition: AMDGPUCallLowering.cpp:904
llvm::AMDGPUCallLowering::lowerCall
bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override
This hook must be implemented to lower the given call instruction, including argument and return valu...
Definition: AMDGPUCallLowering.cpp:924
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::GCNSubtarget
Definition: GCNSubtarget.h:38
llvm::MachineIRBuilder::buildInstrNoInsert
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
Definition: MachineIRBuilder.cpp:40
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
MachineIRBuilder.h
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::CallingConv::AMDGPU_Gfx
@ AMDGPU_Gfx
Calling convention used for AMD graphics targets.
Definition: CallingConv.h:245
llvm::FunctionLoweringInfo::CanLowerReturn
bool CanLowerReturn
CanLowerReturn - true iff the function's return value can be lowered to registers.
Definition: FunctionLoweringInfo.h:63
llvm::ZB_Undefined
@ ZB_Undefined
The returned value is undefined.
Definition: MathExtras.h:47
llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition: GCNSubtarget.h:226
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
extOpcodeToISDExtOpcode
static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc)
Definition: AMDGPUCallLowering.cpp:262
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::AMDGPUFunctionArgInfo::WORKGROUP_ID_X
@ WORKGROUP_ID_X
Definition: AMDGPUArgumentUsageInfo.h:106
llvm::AMDGPULegalizerInfo
This class provides the information for the target register banks.
Definition: AMDGPULegalizerInfo.h:32
llvm::MachineIRBuilder::buildConstant
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Definition: MachineIRBuilder.cpp:255
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::ComputeValueVTs
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:124
llvm::AMDGPU::isKernel
LLVM_READNONE bool isKernel(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.h:699
llvm::getLLTForType
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
Definition: LowLevelType.cpp:21
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:205
llvm::SIMachineFunctionInfo::returnsVoid
bool returnsVoid() const
Definition: SIMachineFunctionInfo.h:832
llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition: MachineInstrBuilder.h:117
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:565
llvm::FunctionLoweringInfo::DemoteRegister
Register DemoteRegister
DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg allocated to hold a pointer to ...
Definition: FunctionLoweringInfo.h:70
llvm::AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR
@ KERNARG_SEGMENT_PTR
Definition: AMDGPUArgumentUsageInfo.h:103
llvm::AMDGPUFunctionArgInfo
Definition: AMDGPUArgumentUsageInfo.h:97
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:33
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1313
llvm::AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR
@ IMPLICIT_ARG_PTR
Definition: AMDGPUArgumentUsageInfo.h:111
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:714
llvm::MachineIRBuilder::buildShl
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Definition: MachineIRBuilder.h:1484
llvm::ISD::ArgFlagsTy::isByVal
bool isByVal() const
Definition: TargetCallingConv.h:88
FunctionLoweringInfo.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:653
llvm::AMDGPUFunctionArgInfo::WorkItemIDX
ArgDescriptor WorkItemIDX
Definition: AMDGPUArgumentUsageInfo.h:148
llvm::CallLowering::ArgInfo
Definition: CallLowering.h:61
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::LLT::getSizeInBytes
unsigned getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelTypeImpl.h:117
llvm::CallLowering::handleAssignments
bool handleAssignments(MachineIRBuilder &MIRBuilder, SmallVectorImpl< ArgInfo > &Args, ValueHandler &Handler, CallingConv::ID CallConv, bool IsVarArg, Register ThisReturnReg=Register()) const
Invoke Handler::assignArg on each of the given Args and then use Handler to move them to the assigned...
Definition: CallLowering.cpp:448
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AMDGPUFunctionArgInfo::WORKITEM_ID_Y
@ WORKITEM_ID_Y
Definition: AMDGPUArgumentUsageInfo.h:115
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:119
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::ISD::NodeType
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
llvm::AMDGPUCallLowering::lowerFormalArgumentsKernel
bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register >> VRegs) const
Definition: AMDGPUCallLowering.cpp:494
llvm::SIMachineFunctionInfo::getStackPtrOffsetReg
Register getStackPtrOffsetReg() const
Definition: SIMachineFunctionInfo.h:752
llvm::MachineIRBuilder::getMF
MachineFunction & getMF()
Getter for the function we currently build.
Definition: MachineIRBuilder.h:270
llvm::CallLowering::IncomingValueHandler
Definition: CallLowering.h:230
llvm::report_fatal_error
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::LLT::vector
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelTypeImpl.h:58
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:26
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:29
llvm::AMDGPUCallLowering::passSpecialInputs
bool passSpecialInputs(MachineIRBuilder &MIRBuilder, CCState &CCInfo, SmallVectorImpl< std::pair< MCRegister, Register >> &ArgRegs, CallLoweringInfo &Info) const
Definition: AMDGPUCallLowering.cpp:744
llvm::RegState::Implicit
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Definition: MachineInstrBuilder.h:47
llvm::CCValAssign::getLocInfo
LocInfo getLocInfo() const
Definition: CallingConvLower.h:155
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:151
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::None
const NoneType None
Definition: None.h:23
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition: MachineInstrBuilder.h:95
llvm::AMDGPUTargetLowering::getTypeForExtReturn
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override
Return the type that should be used to zero or sign extend a zeroext/signext integer return value.
Definition: AMDGPUISelLowering.cpp:686
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::SITargetLowering::allocateSpecialInputVGPRsFixed
void allocateSpecialInputVGPRsFixed(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments in fixed registers.
Definition: SIISelLowering.cpp:1960
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1336
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:378
llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition: CallingConvLower.h:177
llvm::LLT::pointer
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelTypeImpl.h:50
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:555
llvm::AttributeList::ReturnIndex
@ ReturnIndex
Definition: Attributes.h:378
llvm::CCValAssign::LocInfo
LocInfo
Definition: CallingConvLower.h:35
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:211
llvm::AMDGPUFunctionArgInfo::getPreloadedValue
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(PreloadedValue Value) const
Definition: AMDGPUArgumentUsageInfo.cpp:89
llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition: MachineIRBuilder.h:220
llvm::AMDGPUFunctionArgInfo::WORKGROUP_ID_Z
@ WORKGROUP_ID_Z
Definition: AMDGPUArgumentUsageInfo.h:108
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:70
llvm::AMDGPUFunctionArgInfo::WORKITEM_ID_Z
@ WORKITEM_ID_Z
Definition: AMDGPUArgumentUsageInfo.h:116
llvm::CallLowering::checkReturn
bool checkReturn(CCState &CCInfo, SmallVectorImpl< BaseArgInfo > &Outs, CCAssignFn *Fn) const
Definition: CallLowering.cpp:822
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
llvm::AMDGPUTargetLowering::CCAssignFnForReturn
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
Definition: AMDGPUISelLowering.cpp:1147
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:37
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::AMDGPUFunctionArgInfo::WORKITEM_ID_X
@ WORKITEM_ID_X
Definition: AMDGPUArgumentUsageInfo.h:114
llvm::MachineIRBuilder::buildPtrAdd
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_PTR_ADD Op0, Op1.
Definition: MachineIRBuilder.cpp:182
llvm::inferAlignFromPtrInfo
Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO)
Definition: Utils.cpp:600
I
#define I(x, y, z)
Definition: MD5.cpp:59
Analysis.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SITargetLowering::allocateSpecialInputVGPRs
void allocateSpecialInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments at the end of allocated user arguments.
Definition: SIISelLowering.cpp:1939
llvm::FunctionLoweringInfo
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Definition: FunctionLoweringInfo.h:53
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:815
llvm::AMDGPUTargetLowering::CCAssignFnForCall
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
Definition: AMDGPUISelLowering.cpp:1142
llvm::MachineRegisterInfo::createGenericVirtualRegister
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Definition: MachineRegisterInfo.cpp:188
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:30
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:98
llvm::MachineInstrBuilder::addUse
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Definition: MachineInstrBuilder.h:124
llvm::AMDGPUSubtarget::getExplicitKernelArgOffset
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
Definition: AMDGPUSubtarget.h:198
llvm::AMDGPUFunctionArgInfo::WorkItemIDZ
ArgDescriptor WorkItemIDZ
Definition: AMDGPUArgumentUsageInfo.h:150
llvm::ArgDescriptor::isRegister
bool isRegister() const
Definition: AMDGPUArgumentUsageInfo.h:67
llvm::MachineFunction
Definition: MachineFunction.h:227
llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition: MachineFunction.cpp:634
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1486
llvm::MachineIRBuilder::buildInstr
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
Definition: MachineIRBuilder.h:375
llvm::AMDGPUFunctionArgInfo::DISPATCH_ID
@ DISPATCH_ID
Definition: AMDGPUArgumentUsageInfo.h:104
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::CCState::AllocateReg
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
Definition: CallingConvLower.h:351
llvm::countTrailingZeros
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:157
AMDGPU.h
llvm::MachineIRBuilder::insertInstr
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
Definition: MachineIRBuilder.cpp:45
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::CallLowering::insertSRetLoads
void insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg, int FI) const
Load the returned value from the stack into virtual registers in VRegs.
Definition: CallLowering.cpp:719
llvm::MachineIRBuilder::buildCopy
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
Definition: MachineIRBuilder.cpp:238
llvm::ArgDescriptor::isMasked
bool isMasked() const
Definition: AMDGPUArgumentUsageInfo.h:85
uint32_t
llvm::ISD::ArgFlagsTy
Definition: TargetCallingConv.h:27
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AMDGPU::isGraphics
bool isGraphics(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1328
getAssignFnsForCC
static std::pair< CCAssignFn *, CCAssignFn * > getAssignFnsForCC(CallingConv::ID CC, const SITargetLowering &TLI)
Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for CC.
Definition: AMDGPUCallLowering.cpp:894
llvm::SIMachineFunctionInfo::getScratchRSrcReg
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
Definition: SIMachineFunctionInfo.h:725
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:134
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::MachineIRBuilder::buildAnyExt
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
Definition: MachineIRBuilder.cpp:416
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:367
llvm::CallLowering::insertSRetIncomingArgument
void insertSRetIncomingArgument(const Function &F, SmallVectorImpl< ArgInfo > &SplitArgs, Register &DemoteReg, MachineRegisterInfo &MRI, const DataLayout &DL) const
Insert the hidden sret ArgInfo to the beginning of SplitArgs.
Definition: CallLowering.cpp:780
llvm::CallLowering::unpackRegs
void unpackRegs(ArrayRef< Register > DstRegs, Register SrcReg, Type *PackedTy, MachineIRBuilder &MIRBuilder) const
Generate instructions for unpacking SrcReg into the DstRegs corresponding to the aggregate type Packe...
Definition: CallLowering.cpp:228
llvm::SITargetLowering::allocateSpecialEntryInputVGPRs
void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Definition: SIISelLowering.cpp:1817
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::CallLowering::insertSRetStores
void insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, ArrayRef< Register > VRegs, Register DemoteReg) const
Store the return value given by VRegs into stack starting at the offset specified in DemoteReg.
Definition: CallLowering.cpp:749
llvm::commonAlignment
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:221
llvm::AMDGPUFunctionArgInfo::WORKGROUP_ID_Y
@ WORKGROUP_ID_Y
Definition: AMDGPUArgumentUsageInfo.h:107
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:521
llvm::TypeSize
Definition: TypeSize.h:417
llvm::SITargetLowering
Definition: SIISelLowering.h:30
llvm::CCState::getNextStackOffset
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
Definition: CallingConvLower.h:264
llvm::CallLowering::CallLoweringInfo
Definition: CallLowering.h:95
llvm::CallLowering::ValueHandler::MIRBuilder
MachineIRBuilder & MIRBuilder
Definition: CallLowering.h:221
llvm::AMDGPUFunctionArgInfo::DISPATCH_PTR
@ DISPATCH_PTR
Definition: AMDGPUArgumentUsageInfo.h:101
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:136
llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition: Globals.cpp:112
AMDGPULegalizerInfo.h
llvm::CallLowering::splitToValueTypes
void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl< ArgInfo > &SplitArgs, const DataLayout &DL, CallingConv::ID CallConv) const
Break OrigArgInfo into one or more pieces the calling convention can process, returned in SplitArgs.
Definition: CallLowering.cpp:190
AMDGPUCallLowering.h
llvm::MachinePointerInfo::getFixedStack
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition: MachineOperand.cpp:995
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
llvm::MachineRegisterInfo::getType
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Definition: MachineRegisterInfo.h:732
llvm::AMDGPUFunctionArgInfo::WorkItemIDY
ArgDescriptor WorkItemIDY
Definition: AMDGPUArgumentUsageInfo.h:149
llvm::ArgDescriptor::getRegister
MCRegister getRegister() const
Definition: AMDGPUArgumentUsageInfo.h:71
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:331
llvm::constrainOperandRegClass
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:48
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
llvm::MachineFunction::getDataLayout
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Definition: MachineFunction.cpp:260
llvm::MachineBasicBlock::empty
bool empty() const
Definition: MachineBasicBlock.h:240
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::AMDGPUCallLowering::lowerFormalArguments
bool lowerFormalArguments(MachineIRBuilder &B, const Function &F, ArrayRef< ArrayRef< Register >> VRegs, FunctionLoweringInfo &FLI) const override
This hook must be implemented to lower the incoming (formal) arguments, described by VRegs,...
Definition: AMDGPUCallLowering.cpp:573
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1450
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:376
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:205
llvm::MachineIRBuilder::buildStore
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
Definition: MachineIRBuilder.cpp:388
getCallOpcode
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall)
Definition: AMDGPUCallLowering.cpp:898
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:711
llvm::SITargetLowering::allocateSpecialInputSGPRs
void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Definition: SIISelLowering.cpp:1973
llvm::LLT::scalar
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelTypeImpl.h:43
llvm::CallLowering
Definition: CallLowering.h:43
llvm::AMDGPUCallLowering::lowerReturn
bool lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef< Register > VRegs, FunctionLoweringInfo &FLI) const override
This hook behaves as the extended lowerReturn function, but for targets that do not support swifterro...
Definition: AMDGPUCallLowering.cpp:354
llvm::MachinePointerInfo::getStack
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
Definition: MachineOperand.cpp:1008
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::AttributeList::FirstArgIndex
@ FirstArgIndex
Definition: Attributes.h:380
SIRegisterInfo.h
llvm::TargetRegisterInfo::getCallPreservedMask
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function.
Definition: TargetRegisterInfo.h:471
AMDGPUTargetMachine.h
getReg
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
Definition: MipsDisassembler.cpp:580
llvm::LLT
Definition: LowLevelTypeImpl.h:40
llvm::CallLowering::setArgFlags
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const
Definition: CallLowering.cpp:150