LLVM 17.0.0git
ARMCallingConv.cpp
Go to the documentation of this file.
1//=== ARMCallingConv.cpp - ARM Custom CC Routines ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the custom routines for the ARM Calling Convention that
10// aren't done by tablegen, and includes the table generated implementations.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARM.h"
15#include "ARMCallingConv.h"
16#include "ARMSubtarget.h"
17#include "ARMRegisterInfo.h"
18using namespace llvm;
19
20// APCS f64 is in register pairs, possibly split to stack
21static bool f64AssignAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
23 CCState &State, bool CanFail) {
24 static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
25
26 // Try to get the first register.
27 if (unsigned Reg = State.AllocateReg(RegList))
28 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
29 else {
30 // For the 2nd half of a v2f64, do not fail.
31 if (CanFail)
32 return false;
33
34 // Put the whole thing on the stack.
36 ValNo, ValVT, State.AllocateStack(8, Align(4)), LocVT, LocInfo));
37 return true;
38 }
39
40 // Try to get the second register.
41 if (unsigned Reg = State.AllocateReg(RegList))
42 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
43 else
45 ValNo, ValVT, State.AllocateStack(4, Align(4)), LocVT, LocInfo));
46 return true;
47}
48
49static bool CC_ARM_APCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,
51 ISD::ArgFlagsTy ArgFlags,
52 CCState &State) {
53 if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
54 return false;
55 if (LocVT == MVT::v2f64 &&
56 !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
57 return false;
58 return true; // we handled it
59}
60
61// AAPCS f64 is in aligned register pairs
62static bool f64AssignAAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
64 CCState &State, bool CanFail) {
65 static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
66 static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
67 static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 };
68 static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
69
70 unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList);
71 if (Reg == 0) {
72
73 // If we had R3 unallocated only, now we still must to waste it.
74 Reg = State.AllocateReg(GPRArgRegs);
75 assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64");
76
77 // For the 2nd half of a v2f64, do not just fail.
78 if (CanFail)
79 return false;
80
81 // Put the whole thing on the stack.
83 ValNo, ValVT, State.AllocateStack(8, Align(8)), LocVT, LocInfo));
84 return true;
85 }
86
87 unsigned i;
88 for (i = 0; i < 2; ++i)
89 if (HiRegList[i] == Reg)
90 break;
91
92 unsigned T = State.AllocateReg(LoRegList[i]);
93 (void)T;
94 assert(T == LoRegList[i] && "Could not allocate register");
95
96 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
97 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
98 LocVT, LocInfo));
99 return true;
100}
101
102static bool CC_ARM_AAPCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,
103 CCValAssign::LocInfo LocInfo,
104 ISD::ArgFlagsTy ArgFlags,
105 CCState &State) {
106 if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
107 return false;
108 if (LocVT == MVT::v2f64 &&
109 !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
110 return false;
111 return true; // we handled it
112}
113
114static bool f64RetAssign(unsigned ValNo, MVT ValVT, MVT LocVT,
115 CCValAssign::LocInfo LocInfo, CCState &State) {
116 static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
117 static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
118
119 unsigned Reg = State.AllocateReg(HiRegList, LoRegList);
120 if (Reg == 0)
121 return false; // we didn't handle it
122
123 unsigned i;
124 for (i = 0; i < 2; ++i)
125 if (HiRegList[i] == Reg)
126 break;
127
128 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
129 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
130 LocVT, LocInfo));
131 return true;
132}
133
134static bool RetCC_ARM_APCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,
135 CCValAssign::LocInfo LocInfo,
136 ISD::ArgFlagsTy ArgFlags,
137 CCState &State) {
138 if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
139 return false;
140 if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
141 return false;
142 return true; // we handled it
143}
144
145static bool RetCC_ARM_AAPCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,
146 CCValAssign::LocInfo LocInfo,
147 ISD::ArgFlagsTy ArgFlags,
148 CCState &State) {
149 return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
150 State);
151}
152
153static const MCPhysReg RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
154
155static const MCPhysReg SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
156 ARM::S4, ARM::S5, ARM::S6, ARM::S7,
157 ARM::S8, ARM::S9, ARM::S10, ARM::S11,
158 ARM::S12, ARM::S13, ARM::S14, ARM::S15 };
159static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
160 ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
161static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
162
163
164// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
165// has InConsecutiveRegs set, and that the last member also has
166// InConsecutiveRegsLast set. We must process all members of the HA before
167// we can allocate it, as we need to know the total number of registers that
168// will be needed in order to (attempt to) allocate a contiguous block.
169static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT,
170 MVT LocVT,
171 CCValAssign::LocInfo LocInfo,
172 ISD::ArgFlagsTy ArgFlags,
173 CCState &State) {
174 SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
175
176 // AAPCS HFAs must have 1-4 elements, all of the same type
177 if (PendingMembers.size() > 0)
178 assert(PendingMembers[0].getLocVT() == LocVT);
179
180 // Add the argument to the list to be allocated once we know the size of the
181 // aggregate. Store the type's required alignment as extra info for later: in
182 // the [N x i64] case all trace has been removed by the time we actually get
183 // to do allocation.
184 PendingMembers.push_back(CCValAssign::getPending(
185 ValNo, ValVT, LocVT, LocInfo, ArgFlags.getNonZeroOrigAlign().value()));
186
187 if (!ArgFlags.isInConsecutiveRegsLast())
188 return true;
189
190 // Try to allocate a contiguous block of registers, each of the correct
191 // size to hold one member.
192 auto &DL = State.getMachineFunction().getDataLayout();
193 const Align StackAlign = DL.getStackAlignment();
194 const Align FirstMemberAlign(PendingMembers[0].getExtraInfo());
195 Align Alignment = std::min(FirstMemberAlign, StackAlign);
196
197 ArrayRef<MCPhysReg> RegList;
198 switch (LocVT.SimpleTy) {
199 case MVT::i32: {
200 RegList = RRegList;
201 unsigned RegIdx = State.getFirstUnallocated(RegList);
202
203 // First consume all registers that would give an unaligned object. Whether
204 // we go on stack or in regs, no-one will be using them in future.
205 unsigned RegAlign = alignTo(Alignment.value(), 4) / 4;
206 while (RegIdx % RegAlign != 0 && RegIdx < RegList.size())
207 State.AllocateReg(RegList[RegIdx++]);
208
209 break;
210 }
211 case MVT::f16:
212 case MVT::bf16:
213 case MVT::f32:
214 RegList = SRegList;
215 break;
216 case MVT::v4f16:
217 case MVT::v4bf16:
218 case MVT::f64:
219 RegList = DRegList;
220 break;
221 case MVT::v8f16:
222 case MVT::v8bf16:
223 case MVT::v2f64:
224 RegList = QRegList;
225 break;
226 default:
227 llvm_unreachable("Unexpected member type for block aggregate");
228 break;
229 }
230
231 unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
232 if (RegResult) {
233 for (CCValAssign &PendingMember : PendingMembers) {
234 PendingMember.convertToReg(RegResult);
235 State.addLoc(PendingMember);
236 ++RegResult;
237 }
238 PendingMembers.clear();
239 return true;
240 }
241
242 // Register allocation failed, we'll be needing the stack
243 unsigned Size = LocVT.getSizeInBits() / 8;
244 if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) {
245 // If nothing else has used the stack until this point, a non-HFA aggregate
246 // can be split between regs and stack.
247 unsigned RegIdx = State.getFirstUnallocated(RegList);
248 for (auto &It : PendingMembers) {
249 if (RegIdx >= RegList.size())
250 It.convertToMem(State.AllocateStack(Size, Align(Size)));
251 else
252 It.convertToReg(State.AllocateReg(RegList[RegIdx++]));
253
254 State.addLoc(It);
255 }
256 PendingMembers.clear();
257 return true;
258 }
259
260 if (LocVT != MVT::i32)
261 RegList = SRegList;
262
263 // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core)
264 for (auto Reg : RegList)
265 State.AllocateReg(Reg);
266
267 // Clamp the alignment between 4 and 8.
269 Alignment = ArgFlags.getNonZeroMemAlign() <= 4 ? Align(4) : Align(8);
270
271 // After the first item has been allocated, the rest are packed as tightly as
272 // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll
273 // be allocating a bunch of i32 slots).
274 for (auto &It : PendingMembers) {
275 It.convertToMem(State.AllocateStack(Size, Alignment));
276 State.addLoc(It);
277 Alignment = Align(1);
278 }
279
280 // All pending members have now been allocated
281 PendingMembers.clear();
282
283 // This will be allocated by the last member of the aggregate
284 return true;
285}
286
287static bool CustomAssignInRegList(unsigned ValNo, MVT ValVT, MVT LocVT,
288 CCValAssign::LocInfo LocInfo, CCState &State,
289 ArrayRef<MCPhysReg> RegList) {
290 unsigned Reg = State.AllocateReg(RegList);
291 if (Reg) {
292 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
293 return true;
294 }
295 return false;
296}
297
298static bool CC_ARM_AAPCS_Custom_f16(unsigned ValNo, MVT ValVT, MVT LocVT,
299 CCValAssign::LocInfo LocInfo,
300 ISD::ArgFlagsTy ArgFlags, CCState &State) {
301 // f16 arguments are extended to i32 and assigned to a register in [r0, r3]
302 return CustomAssignInRegList(ValNo, ValVT, MVT::i32, LocInfo, State,
303 RRegList);
304}
305
306static bool CC_ARM_AAPCS_VFP_Custom_f16(unsigned ValNo, MVT ValVT, MVT LocVT,
307 CCValAssign::LocInfo LocInfo,
308 ISD::ArgFlagsTy ArgFlags,
309 CCState &State) {
310 // f16 arguments are extended to f32 and assigned to a register in [s0, s15]
311 return CustomAssignInRegList(ValNo, ValVT, MVT::f32, LocInfo, State,
312 SRegList);
313}
314
315// Include the table generated calling convention implementations.
316#include "ARMGenCallingConv.inc"
static const MCPhysReg GPRArgRegs[]
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool CC_ARM_AAPCS_VFP_Custom_f16(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static bool f64RetAssign(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, CCState &State)
static const MCPhysReg RRegList[]
static bool RetCC_ARM_AAPCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static const MCPhysReg SRegList[]
static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static bool f64AssignAAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, CCState &State, bool CanFail)
static const MCPhysReg DRegList[]
static bool RetCC_ARM_APCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static bool CC_ARM_AAPCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static bool f64AssignAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, CCState &State, bool CanFail)
static const MCPhysReg QRegList[]
static bool CC_ARM_AAPCS_Custom_f16(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static bool CC_ARM_APCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static bool CustomAssignInRegList(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, CCState &State, ArrayRef< MCPhysReg > RegList)
uint64_t Size
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isTargetAEABI() const
Definition: ARMSubtarget.h:382
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
unsigned AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
MCPhysReg AllocateRegBlock(ArrayRef< MCPhysReg > Regs, unsigned RegsRequired)
AllocateRegBlock - Attempt to allocate a block of RegsRequired consecutive registers.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
Machine Value Type.
SimpleValueType SimpleTy
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Align getNonZeroOrigAlign() const
bool isInConsecutiveRegsLast() const
Align getNonZeroMemAlign() const