LLVM 17.0.0git
AArch64Subtarget.cpp
Go to the documentation of this file.
1//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64 specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64Subtarget.h"
14
15#include "AArch64.h"
16#include "AArch64InstrInfo.h"
17#include "AArch64PBQPRegAlloc.h"
26#include "llvm/IR/GlobalValue.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-subtarget"
32
33#define GET_SUBTARGETINFO_CTOR
34#define GET_SUBTARGETINFO_TARGET_DESC
35#include "AArch64GenSubtargetInfo.inc"
36
37static cl::opt<bool>
38EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
39 "converter pass"), cl::init(true), cl::Hidden);
40
41// If OS supports TBI, use this flag to enable it.
42static cl::opt<bool>
43UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
44 "an address is ignored"), cl::init(false), cl::Hidden);
45
46static cl::opt<bool>
47 UseNonLazyBind("aarch64-enable-nonlazybind",
48 cl::desc("Call nonlazybind functions via direct GOT load"),
49 cl::init(false), cl::Hidden);
50
51static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
52 cl::desc("Enable the use of AA during codegen."));
53
55 "aarch64-insert-extract-base-cost",
56 cl::desc("Base cost of vector insert/extract element"), cl::Hidden);
57
58// Reserve a list of X# registers, so they are unavailable for register
59// allocator, but can still be used as ABI requests, such as passing arguments
60// to function call.
62ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
63 "registers, so they can't be used by register allocator. "
64 "Should only be used for testing register allocator."),
66
67static cl::opt<bool>
68 ForceStreamingCompatibleSVE("force-streaming-compatible-sve",
69 cl::init(false), cl::Hidden);
70
72 if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
75}
76
77AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
78 StringRef FS, StringRef CPUString, StringRef TuneCPUString) {
79 // Determine default and user-specified characteristics
80
81 if (CPUString.empty())
82 CPUString = "generic";
83
84 if (TuneCPUString.empty())
85 TuneCPUString = CPUString;
86
87 ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
88 initializeProperties();
89
90 return *this;
91}
92
93void AArch64Subtarget::initializeProperties() {
94 // Initialize CPU specific properties. We should add a tablegen feature for
95 // this in the future so we can specify it together with the subtarget
96 // features.
97 switch (ARMProcFamily) {
98 case Others:
99 break;
100 case Carmel:
101 CacheLineSize = 64;
102 break;
103 case CortexA35:
104 case CortexA53:
105 case CortexA55:
109 break;
110 case CortexA57:
115 break;
116 case CortexA65:
118 break;
119 case CortexA72:
120 case CortexA73:
121 case CortexA75:
125 break;
126 case CortexA76:
127 case CortexA77:
128 case CortexA78:
129 case CortexA78C:
130 case CortexR82:
131 case CortexX1:
132 case CortexX1C:
136 break;
137 case CortexA510:
139 VScaleForTuning = 1;
142 break;
143 case CortexA710:
144 case CortexA715:
145 case CortexX2:
146 case CortexX3:
148 VScaleForTuning = 1;
151 break;
152 case A64FX:
153 CacheLineSize = 256;
157 PrefetchDistance = 128;
158 MinPrefetchStride = 1024;
160 VScaleForTuning = 4;
161 break;
162 case AppleA7:
163 case AppleA10:
164 case AppleA11:
165 case AppleA12:
166 case AppleA13:
167 case AppleA14:
168 case AppleA15:
169 case AppleA16:
170 CacheLineSize = 64;
171 PrefetchDistance = 280;
172 MinPrefetchStride = 2048;
174 switch (ARMProcFamily) {
175 case AppleA14:
176 case AppleA15:
177 case AppleA16:
179 break;
180 default:
181 break;
182 }
183 break;
184 case ExynosM3:
186 MaxJumpTableSize = 20;
189 break;
190 case Falkor:
192 // FIXME: remove this to enable 64-bit SLP if performance looks good.
194 CacheLineSize = 128;
195 PrefetchDistance = 820;
196 MinPrefetchStride = 2048;
198 break;
199 case Kryo:
202 CacheLineSize = 128;
203 PrefetchDistance = 740;
204 MinPrefetchStride = 1024;
206 // FIXME: remove this to enable 64-bit SLP if performance looks good.
208 break;
209 case NeoverseE1:
211 break;
212 case NeoverseN1:
216 break;
217 case NeoverseN2:
218 case NeoverseV2:
222 VScaleForTuning = 1;
223 break;
224 case NeoverseV1:
228 VScaleForTuning = 2;
229 break;
230 case Neoverse512TVB:
232 VScaleForTuning = 1;
234 break;
235 case Saphira:
237 // FIXME: remove this to enable 64-bit SLP if performance looks good.
239 break;
240 case ThunderX2T99:
241 CacheLineSize = 64;
245 PrefetchDistance = 128;
246 MinPrefetchStride = 1024;
248 // FIXME: remove this to enable 64-bit SLP if performance looks good.
250 break;
251 case ThunderX:
252 case ThunderXT88:
253 case ThunderXT81:
254 case ThunderXT83:
255 CacheLineSize = 128;
258 // FIXME: remove this to enable 64-bit SLP if performance looks good.
260 break;
261 case TSV110:
262 CacheLineSize = 64;
265 break;
266 case ThunderX3T110:
267 CacheLineSize = 64;
271 PrefetchDistance = 128;
272 MinPrefetchStride = 1024;
274 // FIXME: remove this to enable 64-bit SLP if performance looks good.
276 break;
277 case Ampere1:
278 case Ampere1A:
279 CacheLineSize = 64;
283 break;
284 }
285}
286
288 StringRef TuneCPU, StringRef FS,
289 const TargetMachine &TM, bool LittleEndian,
290 unsigned MinSVEVectorSizeInBitsOverride,
291 unsigned MaxSVEVectorSizeInBitsOverride,
292 bool StreamingSVEModeDisabled)
293 : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
294 ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
295 ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
296 CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
297 IsLittle(LittleEndian),
298 StreamingSVEModeDisabled(StreamingSVEModeDisabled),
299 MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
300 MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
301 InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)),
302 TLInfo(TM, *this) {
305
308 Legalizer.reset(new AArch64LegalizerInfo(*this));
309
310 auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
311
312 // FIXME: At this point, we can't rely on Subtarget having RBI.
313 // It's awkward to mix passing RBI and the Subtarget; should we pass
314 // TII/TRI as well?
316 *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
317
318 RegBankInfo.reset(RBI);
319
320 auto TRI = getRegisterInfo();
321 StringSet<> ReservedRegNames;
322 ReservedRegNames.insert(ReservedRegsForRA.begin(), ReservedRegsForRA.end());
323 for (unsigned i = 0; i < 29; ++i) {
324 if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i)))
326 }
327 // X30 is named LR, so we can't use TRI->getName to check X30.
328 if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR"))
330 // X29 is named FP, so we can't use TRI->getName to check X29.
331 if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP"))
333}
334
336 return CallLoweringInfo.get();
337}
338
340 return InlineAsmLoweringInfo.get();
341}
342
344 return InstSelector.get();
345}
346
348 return Legalizer.get();
349}
350
352 return RegBankInfo.get();
353}
354
355/// Find the target operand flags that describe how a global value should be
356/// referenced for the current subtarget.
357unsigned
359 const TargetMachine &TM) const {
360 // MachO large model always goes via a GOT, simply to get a single 8-byte
361 // absolute relocation on all global addresses.
362 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
363 return AArch64II::MO_GOT;
364
365 // All globals dynamically protected by MTE must have their address tags
366 // synthesized. This is done by having the loader stash the tag in the GOT
367 // entry. Force all tagged globals (even ones with internal linkage) through
368 // the GOT.
369 if (GV->isTagged())
370 return AArch64II::MO_GOT;
371
372 if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
373 if (GV->hasDLLImportStorageClass()) {
377 }
378 if (getTargetTriple().isOSWindows())
380 return AArch64II::MO_GOT;
381 }
382
383 // The small code model's direct accesses use ADRP, which cannot
384 // necessarily produce the value 0 (if the code is above 4GB).
385 // Same for the tiny code model, where we have a pc relative LDR.
386 if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
388 return AArch64II::MO_GOT;
389
390 // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
391 // that their nominal addresses are tagged and outside of the code model. In
392 // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
393 // tag if necessary based on MO_TAGGED.
394 if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
396
398}
399
401 const GlobalValue *GV, const TargetMachine &TM) const {
402 // MachO large model always goes via a GOT, because we don't have the
403 // relocations available to do anything else..
404 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
405 !GV->hasInternalLinkage())
406 return AArch64II::MO_GOT;
407
408 // NonLazyBind goes via GOT unless we know it's available locally.
409 auto *F = dyn_cast<Function>(GV);
410 if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
411 !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
412 return AArch64II::MO_GOT;
413
414 if (getTargetTriple().isOSWindows()) {
415 if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy() &&
417 // On Arm64EC, if we're calling a function directly, use MO_DLLIMPORT,
418 // not MO_DLLIMPORTAUX.
420 }
421
422 // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
423 return ClassifyGlobalReference(GV, TM);
424 }
425
427}
428
430 unsigned NumRegionInstrs) const {
431 // LNT run (at least on Cyclone) showed reasonably significant gains for
432 // bi-directional scheduling. 253.perlbmk.
433 Policy.OnlyTopDown = false;
434 Policy.OnlyBottomUp = false;
435 // Enabling or Disabling the latency heuristic is a close call: It seems to
436 // help nearly no benchmark on out-of-order architectures, on the other hand
437 // it regresses register pressure on a few benchmarking.
438 Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
439}
440
443}
444
447 return false;
448
450 return true;
451 if (TargetTriple.isiOS()) {
453 }
454
455 return false;
456}
457
458std::unique_ptr<PBQPRAConstraint>
460 return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
461}
462
464 // We usually compute max call frame size after ISel. Do the computation now
465 // if the .mir file didn't specify it. Note that this will probably give you
466 // bogus values after PEI has eliminated the callframe setup/destroy pseudo
467 // instructions, specify explicitly if you need it to be correct.
468 MachineFrameInfo &MFI = MF.getFrameInfo();
471}
472
473bool AArch64Subtarget::useAA() const { return UseAA; }
474
477 assert(hasSVEorSME() && "Expected SVE to be available");
478 return hasSVEorSME();
479 }
480 return false;
481}
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the Machinelegalizer class for AArch64.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static cl::opt< bool > UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " "an address is ignored"), cl::init(false), cl::Hidden)
static cl::opt< bool > UseNonLazyBind("aarch64-enable-nonlazybind", cl::desc("Call nonlazybind functions via direct GOT load"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " "converter pass"), cl::init(true), cl::Hidden)
static cl::opt< bool > ForceStreamingCompatibleSVE("force-streaming-compatible-sve", cl::init(false), cl::Hidden)
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static cl::list< std::string > ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical " "registers, so they can't be used by register allocator. " "Should only be used for testing register allocator."), cl::CommaSeparated, cl::Hidden)
static cl::opt< unsigned > OverrideVectorInsertExtractBaseCost("aarch64-insert-extract-base-cost", cl::desc("Base cost of vector insert/extract element"), cl::Hidden)
#define F(x, y, z)
Definition: MD5.cpp:55
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
return InstrInfo
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This class provides the information for the target register banks.
This class provides the information for the target register banks.
const CallLowering * getCallLowering() const override
const AArch64RegisterInfo * getRegisterInfo() const override
bool forceStreamingCompatibleSVE() const
std::unique_ptr< InstructionSelector > InstSelector
ARMProcFamilyEnum ARMProcFamily
ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
std::unique_ptr< RegisterBankInfo > RegBankInfo
bool useSmallAddressing() const
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
bool enableEarlyIfConversion() const override
const InlineAsmLowering * getInlineAsmLowering() const override
unsigned getVectorInsertExtractBaseCost() const
std::unique_ptr< CallLowering > CallLoweringInfo
GlobalISel related APIs.
unsigned classifyGlobalFunctionReference(const GlobalValue *GV, const TargetMachine &TM) const
bool useAA() const override
const AArch64TargetLowering * getTargetLowering() const override
bool supportsAddressTopByteIgnored() const
CPU has TBI (top byte of addresses is ignored during HW address translation) and OS enables it.
const Triple & getTargetTriple() const
void mirFileLoaded(MachineFunction &MF) const override
Triple TargetTriple
TargetTriple - What processor and OS we're targeting.
InstructionSelector * getInstructionSelector() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
ParseSubtargetFeatures - Parses features string setting specified subtarget options.
AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, const TargetMachine &TM, bool LittleEndian, unsigned MinSVEVectorSizeInBitsOverride=0, unsigned MaxSVEVectorSizeInBitsOverride=0, bool StreamingSVEModeDisabled=true)
This constructor initializes the data members to match that of the specified triple.
const LegalizerInfo * getLegalizerInfo() const override
std::unique_ptr< PBQPRAConstraint > getCustomPBQPConstraints() const override
const RegisterBankInfo * getRegBankInfo() const override
std::unique_ptr< InlineAsmLowering > InlineAsmLoweringInfo
BitVector & set()
Definition: BitVector.h:351
bool isTagged() const
Definition: GlobalValue.h:360
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:524
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:274
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
bool hasInternalLinkage() const
Definition: GlobalValue.h:521
Type * getValueType() const
Definition: GlobalValue.h:292
Provides the logic to select generic machine instructions.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void computeMaxCallFrameSize(const MachineFunction &MF)
Computes the maximum size of a callframe and the AdjustsStack property.
bool isMaxCallFrameSizeComputed() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Holds all the information related to register banks.
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:256
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:34
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool isDriverKit() const
Is this an Apple DriverKit triple.
Definition: Triple.h:514
bool isiOS() const
Is this an iOS triple.
Definition: Triple.h:495
VersionTuple getiOSVersion() const
Parse the version number as with getOSVersion.
Definition: Triple.cpp:1275
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition: Type.h:249
Represents a version number in the form major[.minor[.subminor[.build]]].
Definition: VersionTuple.h:31
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_DLLIMPORTAUX
MO_DLLIMPORTAUX - Symbol refers to "auxilliary" import stub.
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
bool isX18ReservedByDefault(const Triple &TT)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
@ CommaSeparated
Definition: CommandLine.h:164
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.