LLVM  14.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that ARM uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMISelLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMCallingConv.h"
18 #include "ARMConstantPoolValue.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMPerfectShuffle.h"
21 #include "ARMRegisterInfo.h"
22 #include "ARMSelectionDAGInfo.h"
23 #include "ARMSubtarget.h"
24 #include "ARMTargetTransformInfo.h"
27 #include "Utils/ARMBaseInfo.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/BitVector.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/ADT/StringExtras.h"
38 #include "llvm/ADT/StringRef.h"
39 #include "llvm/ADT/StringSwitch.h"
40 #include "llvm/ADT/Triple.h"
41 #include "llvm/ADT/Twine.h"
66 #include "llvm/IR/Attributes.h"
67 #include "llvm/IR/CallingConv.h"
68 #include "llvm/IR/Constant.h"
69 #include "llvm/IR/Constants.h"
70 #include "llvm/IR/DataLayout.h"
71 #include "llvm/IR/DebugLoc.h"
72 #include "llvm/IR/DerivedTypes.h"
73 #include "llvm/IR/Function.h"
74 #include "llvm/IR/GlobalAlias.h"
75 #include "llvm/IR/GlobalValue.h"
76 #include "llvm/IR/GlobalVariable.h"
77 #include "llvm/IR/IRBuilder.h"
78 #include "llvm/IR/InlineAsm.h"
79 #include "llvm/IR/Instruction.h"
80 #include "llvm/IR/Instructions.h"
81 #include "llvm/IR/IntrinsicInst.h"
82 #include "llvm/IR/Intrinsics.h"
83 #include "llvm/IR/IntrinsicsARM.h"
84 #include "llvm/IR/Module.h"
85 #include "llvm/IR/PatternMatch.h"
86 #include "llvm/IR/Type.h"
87 #include "llvm/IR/User.h"
88 #include "llvm/IR/Value.h"
89 #include "llvm/MC/MCInstrDesc.h"
91 #include "llvm/MC/MCRegisterInfo.h"
92 #include "llvm/MC/MCSchedule.h"
95 #include "llvm/Support/Casting.h"
96 #include "llvm/Support/CodeGen.h"
98 #include "llvm/Support/Compiler.h"
99 #include "llvm/Support/Debug.h"
101 #include "llvm/Support/KnownBits.h"
103 #include "llvm/Support/MathExtras.h"
107 #include <algorithm>
108 #include <cassert>
109 #include <cstdint>
110 #include <cstdlib>
111 #include <iterator>
112 #include <limits>
113 #include <string>
114 #include <tuple>
115 #include <utility>
116 #include <vector>
117 
118 using namespace llvm;
119 using namespace llvm::PatternMatch;
120 
121 #define DEBUG_TYPE "arm-isel"
122 
123 STATISTIC(NumTailCalls, "Number of tail calls");
124 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
125 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
126 STATISTIC(NumConstpoolPromoted,
127  "Number of constants with their storage promoted into constant pools");
128 
129 static cl::opt<bool>
130 ARMInterworking("arm-interworking", cl::Hidden,
131  cl::desc("Enable / disable ARM interworking (for debugging only)"),
132  cl::init(true));
133 
135  "arm-promote-constant", cl::Hidden,
136  cl::desc("Enable / disable promotion of unnamed_addr constants into "
137  "constant pools"),
138  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
140  "arm-promote-constant-max-size", cl::Hidden,
141  cl::desc("Maximum size of constant to promote into a constant pool"),
142  cl::init(64));
144  "arm-promote-constant-max-total", cl::Hidden,
145  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
146  cl::init(128));
147 
149 MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
150  cl::desc("Maximum interleave factor for MVE VLDn to generate."),
151  cl::init(2));
152 
153 // The APCS parameter registers.
154 static const MCPhysReg GPRArgRegs[] = {
155  ARM::R0, ARM::R1, ARM::R2, ARM::R3
156 };
157 
158 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
159  if (VT != PromotedLdStVT) {
160  setOperationAction(ISD::LOAD, VT, Promote);
161  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
162 
163  setOperationAction(ISD::STORE, VT, Promote);
164  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
165  }
166 
167  MVT ElemTy = VT.getVectorElementType();
168  if (ElemTy != MVT::f64)
169  setOperationAction(ISD::SETCC, VT, Custom);
170  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
171  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
172  if (ElemTy == MVT::i32) {
173  setOperationAction(ISD::SINT_TO_FP, VT, Custom);
174  setOperationAction(ISD::UINT_TO_FP, VT, Custom);
175  setOperationAction(ISD::FP_TO_SINT, VT, Custom);
176  setOperationAction(ISD::FP_TO_UINT, VT, Custom);
177  } else {
178  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
179  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
180  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
181  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
182  }
183  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
184  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
185  setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
186  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
187  setOperationAction(ISD::SELECT, VT, Expand);
188  setOperationAction(ISD::SELECT_CC, VT, Expand);
189  setOperationAction(ISD::VSELECT, VT, Expand);
190  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
191  if (VT.isInteger()) {
192  setOperationAction(ISD::SHL, VT, Custom);
193  setOperationAction(ISD::SRA, VT, Custom);
194  setOperationAction(ISD::SRL, VT, Custom);
195  }
196 
197  // Neon does not support vector divide/remainder operations.
198  setOperationAction(ISD::SDIV, VT, Expand);
199  setOperationAction(ISD::UDIV, VT, Expand);
200  setOperationAction(ISD::FDIV, VT, Expand);
201  setOperationAction(ISD::SREM, VT, Expand);
202  setOperationAction(ISD::UREM, VT, Expand);
203  setOperationAction(ISD::FREM, VT, Expand);
204  setOperationAction(ISD::SDIVREM, VT, Expand);
205  setOperationAction(ISD::UDIVREM, VT, Expand);
206 
207  if (!VT.isFloatingPoint() &&
208  VT != MVT::v2i64 && VT != MVT::v1i64)
209  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
210  setOperationAction(Opcode, VT, Legal);
211  if (!VT.isFloatingPoint())
212  for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
213  setOperationAction(Opcode, VT, Legal);
214 }
215 
216 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
217  addRegisterClass(VT, &ARM::DPRRegClass);
218  addTypeForNEON(VT, MVT::f64);
219 }
220 
221 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
222  addRegisterClass(VT, &ARM::DPairRegClass);
223  addTypeForNEON(VT, MVT::v2f64);
224 }
225 
226 void ARMTargetLowering::setAllExpand(MVT VT) {
227  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
228  setOperationAction(Opc, VT, Expand);
229 
230  // We support these really simple operations even on types where all
231  // the actual arithmetic has to be broken down into simpler
232  // operations or turned into library calls.
233  setOperationAction(ISD::BITCAST, VT, Legal);
234  setOperationAction(ISD::LOAD, VT, Legal);
235  setOperationAction(ISD::STORE, VT, Legal);
236  setOperationAction(ISD::UNDEF, VT, Legal);
237 }
238 
239 void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
240  LegalizeAction Action) {
241  setLoadExtAction(ISD::EXTLOAD, From, To, Action);
242  setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
243  setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
244 }
245 
246 void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
247  const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
248 
249  for (auto VT : IntTypes) {
250  addRegisterClass(VT, &ARM::MQPRRegClass);
251  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
252  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
253  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
254  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
255  setOperationAction(ISD::SHL, VT, Custom);
256  setOperationAction(ISD::SRA, VT, Custom);
257  setOperationAction(ISD::SRL, VT, Custom);
258  setOperationAction(ISD::SMIN, VT, Legal);
259  setOperationAction(ISD::SMAX, VT, Legal);
260  setOperationAction(ISD::UMIN, VT, Legal);
261  setOperationAction(ISD::UMAX, VT, Legal);
262  setOperationAction(ISD::ABS, VT, Legal);
263  setOperationAction(ISD::SETCC, VT, Custom);
264  setOperationAction(ISD::MLOAD, VT, Custom);
265  setOperationAction(ISD::MSTORE, VT, Legal);
266  setOperationAction(ISD::CTLZ, VT, Legal);
267  setOperationAction(ISD::CTTZ, VT, Custom);
268  setOperationAction(ISD::BITREVERSE, VT, Legal);
269  setOperationAction(ISD::BSWAP, VT, Legal);
270  setOperationAction(ISD::SADDSAT, VT, Legal);
271  setOperationAction(ISD::UADDSAT, VT, Legal);
272  setOperationAction(ISD::SSUBSAT, VT, Legal);
273  setOperationAction(ISD::USUBSAT, VT, Legal);
274  setOperationAction(ISD::ABDS, VT, Legal);
275  setOperationAction(ISD::ABDU, VT, Legal);
276 
277  // No native support for these.
278  setOperationAction(ISD::UDIV, VT, Expand);
279  setOperationAction(ISD::SDIV, VT, Expand);
280  setOperationAction(ISD::UREM, VT, Expand);
281  setOperationAction(ISD::SREM, VT, Expand);
282  setOperationAction(ISD::UDIVREM, VT, Expand);
283  setOperationAction(ISD::SDIVREM, VT, Expand);
284  setOperationAction(ISD::CTPOP, VT, Expand);
285  setOperationAction(ISD::SELECT, VT, Expand);
286  setOperationAction(ISD::SELECT_CC, VT, Expand);
287 
288  // Vector reductions
289  setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
290  setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
291  setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
292  setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
293  setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
294  setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
295  setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
296  setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
297  setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
298 
299  if (!HasMVEFP) {
300  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
301  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
302  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
303  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
304  } else {
305  setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
306  setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
307  }
308 
309  // Pre and Post inc are supported on loads and stores
310  for (unsigned im = (unsigned)ISD::PRE_INC;
311  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
312  setIndexedLoadAction(im, VT, Legal);
313  setIndexedStoreAction(im, VT, Legal);
314  setIndexedMaskedLoadAction(im, VT, Legal);
315  setIndexedMaskedStoreAction(im, VT, Legal);
316  }
317  }
318 
319  const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
320  for (auto VT : FloatTypes) {
321  addRegisterClass(VT, &ARM::MQPRRegClass);
322  if (!HasMVEFP)
323  setAllExpand(VT);
324 
325  // These are legal or custom whether we have MVE.fp or not
326  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
327  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
328  setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
329  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
330  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
331  setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
332  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
333  setOperationAction(ISD::SETCC, VT, Custom);
334  setOperationAction(ISD::MLOAD, VT, Custom);
335  setOperationAction(ISD::MSTORE, VT, Legal);
336  setOperationAction(ISD::SELECT, VT, Expand);
337  setOperationAction(ISD::SELECT_CC, VT, Expand);
338 
339  // Pre and Post inc are supported on loads and stores
340  for (unsigned im = (unsigned)ISD::PRE_INC;
341  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
342  setIndexedLoadAction(im, VT, Legal);
343  setIndexedStoreAction(im, VT, Legal);
344  setIndexedMaskedLoadAction(im, VT, Legal);
345  setIndexedMaskedStoreAction(im, VT, Legal);
346  }
347 
348  if (HasMVEFP) {
349  setOperationAction(ISD::FMINNUM, VT, Legal);
350  setOperationAction(ISD::FMAXNUM, VT, Legal);
351  setOperationAction(ISD::FROUND, VT, Legal);
352  setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
353  setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
354  setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
355  setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
356 
357  // No native support for these.
358  setOperationAction(ISD::FDIV, VT, Expand);
359  setOperationAction(ISD::FREM, VT, Expand);
360  setOperationAction(ISD::FSQRT, VT, Expand);
361  setOperationAction(ISD::FSIN, VT, Expand);
362  setOperationAction(ISD::FCOS, VT, Expand);
363  setOperationAction(ISD::FPOW, VT, Expand);
364  setOperationAction(ISD::FLOG, VT, Expand);
365  setOperationAction(ISD::FLOG2, VT, Expand);
366  setOperationAction(ISD::FLOG10, VT, Expand);
367  setOperationAction(ISD::FEXP, VT, Expand);
368  setOperationAction(ISD::FEXP2, VT, Expand);
369  setOperationAction(ISD::FNEARBYINT, VT, Expand);
370  }
371  }
372 
373  // Custom Expand smaller than legal vector reductions to prevent false zero
374  // items being added.
375  setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom);
376  setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom);
377  setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom);
378  setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom);
379  setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
380  setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom);
381  setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom);
382  setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom);
383 
384  // We 'support' these types up to bitcast/load/store level, regardless of
385  // MVE integer-only / float support. Only doing FP data processing on the FP
386  // vector types is inhibited at integer-only level.
387  const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
388  for (auto VT : LongTypes) {
389  addRegisterClass(VT, &ARM::MQPRRegClass);
390  setAllExpand(VT);
391  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
392  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
393  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
394  setOperationAction(ISD::VSELECT, VT, Legal);
395  }
396  setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
397 
398  // We can do bitwise operations on v2i64 vectors
399  setOperationAction(ISD::AND, MVT::v2i64, Legal);
400  setOperationAction(ISD::OR, MVT::v2i64, Legal);
401  setOperationAction(ISD::XOR, MVT::v2i64, Legal);
402 
403  // It is legal to extload from v4i8 to v4i16 or v4i32.
404  addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
405  addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
406  addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
407 
408  // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
409  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
410  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
411  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
412  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal);
413  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal);
414 
415  // Some truncating stores are legal too.
416  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
417  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
418  setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
419 
420  // Pre and Post inc on these are legal, given the correct extends
421  for (unsigned im = (unsigned)ISD::PRE_INC;
422  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
423  for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
424  setIndexedLoadAction(im, VT, Legal);
425  setIndexedStoreAction(im, VT, Legal);
426  setIndexedMaskedLoadAction(im, VT, Legal);
427  setIndexedMaskedStoreAction(im, VT, Legal);
428  }
429  }
430 
431  // Predicate types
432  const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
433  for (auto VT : pTypes) {
434  addRegisterClass(VT, &ARM::VCCRRegClass);
435  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
436  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
437  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
438  setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
439  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
440  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
441  setOperationAction(ISD::SETCC, VT, Custom);
442  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
443  setOperationAction(ISD::LOAD, VT, Custom);
444  setOperationAction(ISD::STORE, VT, Custom);
445  setOperationAction(ISD::TRUNCATE, VT, Custom);
446  setOperationAction(ISD::VSELECT, VT, Expand);
447  setOperationAction(ISD::SELECT, VT, Expand);
448  }
449  setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
450  setOperationAction(ISD::TRUNCATE, MVT::v2i1, Expand);
451  setOperationAction(ISD::AND, MVT::v2i1, Expand);
452  setOperationAction(ISD::OR, MVT::v2i1, Expand);
453  setOperationAction(ISD::XOR, MVT::v2i1, Expand);
454  setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Expand);
455  setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Expand);
456  setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Expand);
457  setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Expand);
458 
459  setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
460  setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
461  setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
462  setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
463  setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
464  setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
465  setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
466  setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
467 }
468 
470  const ARMSubtarget &STI)
471  : TargetLowering(TM), Subtarget(&STI) {
472  RegInfo = Subtarget->getRegisterInfo();
473  Itins = Subtarget->getInstrItineraryData();
474 
477 
478  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
479  !Subtarget->isTargetWatchOS()) {
480  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
481  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
482  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
483  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
485  }
486 
487  if (Subtarget->isTargetMachO()) {
488  // Uses VFP for Thumb libfuncs if available.
489  if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
490  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
491  static const struct {
492  const RTLIB::Libcall Op;
493  const char * const Name;
494  const ISD::CondCode Cond;
495  } LibraryCalls[] = {
496  // Single-precision floating-point arithmetic.
497  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
498  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
499  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
500  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
501 
502  // Double-precision floating-point arithmetic.
503  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
504  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
505  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
506  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
507 
508  // Single-precision comparisons.
509  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
510  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
511  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
512  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
513  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
514  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
515  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
516 
517  // Double-precision comparisons.
518  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
519  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
520  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
521  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
522  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
523  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
524  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
525 
526  // Floating-point to integer conversions.
527  // i64 conversions are done via library routines even when generating VFP
528  // instructions, so use the same ones.
529  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
530  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
531  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
532  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
533 
534  // Conversions between floating types.
535  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
536  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
537 
538  // Integer to floating-point conversions.
539  // i64 conversions are done via library routines even when generating VFP
540  // instructions, so use the same ones.
541  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
542  // e.g., __floatunsidf vs. __floatunssidfvfp.
543  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
544  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
545  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
546  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
547  };
548 
549  for (const auto &LC : LibraryCalls) {
550  setLibcallName(LC.Op, LC.Name);
551  if (LC.Cond != ISD::SETCC_INVALID)
552  setCmpLibcallCC(LC.Op, LC.Cond);
553  }
554  }
555  }
556 
557  // These libcalls are not available in 32-bit.
558  setLibcallName(RTLIB::SHL_I128, nullptr);
559  setLibcallName(RTLIB::SRL_I128, nullptr);
560  setLibcallName(RTLIB::SRA_I128, nullptr);
561  setLibcallName(RTLIB::MUL_I128, nullptr);
562  setLibcallName(RTLIB::MULO_I64, nullptr);
563  setLibcallName(RTLIB::MULO_I128, nullptr);
564 
565  // RTLIB
566  if (Subtarget->isAAPCS_ABI() &&
567  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
568  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
569  static const struct {
570  const RTLIB::Libcall Op;
571  const char * const Name;
572  const CallingConv::ID CC;
573  const ISD::CondCode Cond;
574  } LibraryCalls[] = {
575  // Double-precision floating-point arithmetic helper functions
576  // RTABI chapter 4.1.2, Table 2
577  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
578  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
579  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
580  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
581 
582  // Double-precision floating-point comparison helper functions
583  // RTABI chapter 4.1.2, Table 3
584  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
585  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
586  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
587  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
588  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
589  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
590  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
591 
592  // Single-precision floating-point arithmetic helper functions
593  // RTABI chapter 4.1.2, Table 4
594  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
595  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
596  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
597  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
598 
599  // Single-precision floating-point comparison helper functions
600  // RTABI chapter 4.1.2, Table 5
601  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
602  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
603  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
604  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
605  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
606  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
607  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
608 
609  // Floating-point to integer conversions.
610  // RTABI chapter 4.1.2, Table 6
611  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
612  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
613  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
614  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
615  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
616  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
617  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
618  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
619 
620  // Conversions between floating types.
621  // RTABI chapter 4.1.2, Table 7
622  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
623  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
624  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
625 
626  // Integer to floating-point conversions.
627  // RTABI chapter 4.1.2, Table 8
628  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
629  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
630  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
631  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
634  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
635  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
636 
637  // Long long helper functions
638  // RTABI chapter 4.2, Table 9
639  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
640  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
641  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
642  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
643 
644  // Integer division functions
645  // RTABI chapter 4.3.1
646  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
647  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
648  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
649  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
650  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
651  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
652  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
653  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
654  };
655 
656  for (const auto &LC : LibraryCalls) {
657  setLibcallName(LC.Op, LC.Name);
658  setLibcallCallingConv(LC.Op, LC.CC);
659  if (LC.Cond != ISD::SETCC_INVALID)
660  setCmpLibcallCC(LC.Op, LC.Cond);
661  }
662 
663  // EABI dependent RTLIB
664  if (TM.Options.EABIVersion == EABI::EABI4 ||
665  TM.Options.EABIVersion == EABI::EABI5) {
666  static const struct {
667  const RTLIB::Libcall Op;
668  const char *const Name;
669  const CallingConv::ID CC;
670  const ISD::CondCode Cond;
671  } MemOpsLibraryCalls[] = {
672  // Memory operations
673  // RTABI chapter 4.3.4
675  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
676  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
677  };
678 
679  for (const auto &LC : MemOpsLibraryCalls) {
680  setLibcallName(LC.Op, LC.Name);
681  setLibcallCallingConv(LC.Op, LC.CC);
682  if (LC.Cond != ISD::SETCC_INVALID)
683  setCmpLibcallCC(LC.Op, LC.Cond);
684  }
685  }
686  }
687 
688  if (Subtarget->isTargetWindows()) {
689  static const struct {
690  const RTLIB::Libcall Op;
691  const char * const Name;
692  const CallingConv::ID CC;
693  } LibraryCalls[] = {
694  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
695  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
696  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
697  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
698  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
699  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
700  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
701  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
702  };
703 
704  for (const auto &LC : LibraryCalls) {
705  setLibcallName(LC.Op, LC.Name);
706  setLibcallCallingConv(LC.Op, LC.CC);
707  }
708  }
709 
710  // Use divmod compiler-rt calls for iOS 5.0 and later.
711  if (Subtarget->isTargetMachO() &&
712  !(Subtarget->isTargetIOS() &&
713  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
714  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
715  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
716  }
717 
718  // The half <-> float conversion functions are always soft-float on
719  // non-watchos platforms, but are needed for some targets which use a
720  // hard-float calling convention by default.
721  if (!Subtarget->isTargetWatchABI()) {
722  if (Subtarget->isAAPCS_ABI()) {
723  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
724  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
725  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
726  } else {
727  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
728  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
729  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
730  }
731  }
732 
733  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
734  // a __gnu_ prefix (which is the default).
735  if (Subtarget->isTargetAEABI()) {
736  static const struct {
737  const RTLIB::Libcall Op;
738  const char * const Name;
739  const CallingConv::ID CC;
740  } LibraryCalls[] = {
741  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
742  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
743  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
744  };
745 
746  for (const auto &LC : LibraryCalls) {
747  setLibcallName(LC.Op, LC.Name);
748  setLibcallCallingConv(LC.Op, LC.CC);
749  }
750  }
751 
752  if (Subtarget->isThumb1Only())
753  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
754  else
755  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
756 
757  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
758  Subtarget->hasFPRegs()) {
759  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
760  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
761 
766 
767  if (!Subtarget->hasVFP2Base())
768  setAllExpand(MVT::f32);
769  if (!Subtarget->hasFP64())
770  setAllExpand(MVT::f64);
771  }
772 
773  if (Subtarget->hasFullFP16()) {
774  addRegisterClass(MVT::f16, &ARM::HPRRegClass);
777 
780  }
781 
782  if (Subtarget->hasBF16()) {
783  addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
784  setAllExpand(MVT::bf16);
785  if (!Subtarget->hasFullFP16())
787  }
788 
789  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
790  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
791  setTruncStoreAction(VT, InnerVT, Expand);
792  addAllExtLoads(VT, InnerVT, Expand);
793  }
794 
797 
799  }
800 
803 
806 
807  if (Subtarget->hasMVEIntegerOps())
808  addMVEVectorTypes(Subtarget->hasMVEFloatOps());
809 
810  // Combine low-overhead loop intrinsics so that we can lower i1 types.
811  if (Subtarget->hasLOB()) {
814  }
815 
816  if (Subtarget->hasNEON()) {
817  addDRTypeForNEON(MVT::v2f32);
818  addDRTypeForNEON(MVT::v8i8);
819  addDRTypeForNEON(MVT::v4i16);
820  addDRTypeForNEON(MVT::v2i32);
821  addDRTypeForNEON(MVT::v1i64);
822 
823  addQRTypeForNEON(MVT::v4f32);
824  addQRTypeForNEON(MVT::v2f64);
825  addQRTypeForNEON(MVT::v16i8);
826  addQRTypeForNEON(MVT::v8i16);
827  addQRTypeForNEON(MVT::v4i32);
828  addQRTypeForNEON(MVT::v2i64);
829 
830  if (Subtarget->hasFullFP16()) {
831  addQRTypeForNEON(MVT::v8f16);
832  addDRTypeForNEON(MVT::v4f16);
833  }
834 
835  if (Subtarget->hasBF16()) {
836  addQRTypeForNEON(MVT::v8bf16);
837  addDRTypeForNEON(MVT::v4bf16);
838  }
839  }
840 
841  if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
842  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
843  // none of Neon, MVE or VFP supports any arithmetic operations on it.
847  // FIXME: Code duplication: FDIV and FREM are expanded always, see
848  // ARMTargetLowering::addTypeForNEON method for details.
851  // FIXME: Create unittest.
852  // In another words, find a way when "copysign" appears in DAG with vector
853  // operands.
855  // FIXME: Code duplication: SETCC has custom operation action, see
856  // ARMTargetLowering::addTypeForNEON method for details.
858  // FIXME: Create unittest for FNEG and for FABS.
870  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
877  }
878 
879  if (Subtarget->hasNEON()) {
880  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
881  // supported for v4f32.
896 
897  // Mark v2f32 intrinsics.
912 
913  // Neon does not support some operations on v1i64 and v2i64 types.
915  // Custom handling for some quad-vector types to detect VMULL.
919  // Custom handling for some vector types to avoid expensive expansions
924  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
925  // a destination type that is wider than the source, and nor does
926  // it have a FP_TO_[SU]INT instruction with a narrower destination than
927  // source.
936 
939 
940  // NEON does not have single instruction CTPOP for vectors with element
941  // types wider than 8-bits. However, custom lowering can leverage the
942  // v8i8/v16i8 vcnt instruction.
949 
952 
953  // NEON does not have single instruction CTTZ for vectors.
958 
963 
968 
973 
974  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
977  }
978 
979  // NEON only has FMA instructions as of VFP4.
980  if (!Subtarget->hasVFP4Base()) {
983  }
984 
992 
993  // It is legal to extload from v4i8 to v4i16 or v4i32.
995  MVT::v2i32}) {
1000  }
1001  }
1002  }
1003 
1004  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1021  }
1022  if (Subtarget->hasMVEIntegerOps()) {
1031  }
1032  if (Subtarget->hasMVEFloatOps()) {
1034  }
1035 
1036  if (!Subtarget->hasFP64()) {
1037  // When targeting a floating-point unit with only single-precision
1038  // operations, f64 is legal for the few double-precision instructions which
1039  // are present However, no double-precision operations other than moves,
1040  // loads and stores are provided by the hardware.
1077  }
1078 
1079  if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1082  if (Subtarget->hasFullFP16()) {
1085  }
1086  }
1087 
1088  if (!Subtarget->hasFP16()) {
1091  }
1092 
1094 
1095  // ARM does not have floating-point extending loads.
1096  for (MVT VT : MVT::fp_valuetypes()) {
1099  }
1100 
1101  // ... or truncating stores
1105 
1106  // ARM does not have i1 sign extending load.
1107  for (MVT VT : MVT::integer_valuetypes())
1109 
1110  // ARM supports all 4 flavors of integer indexed load / store.
1111  if (!Subtarget->isThumb1Only()) {
1112  for (unsigned im = (unsigned)ISD::PRE_INC;
1113  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1122  }
1123  } else {
1124  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1127  }
1128 
1133 
1136  if (Subtarget->hasDSP()) {
1145  }
1146  if (Subtarget->hasBaseDSP()) {
1149  }
1150 
1151  // i64 operation support.
1154  if (Subtarget->isThumb1Only()) {
1157  }
1158  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1159  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1161 
1171 
1172  // MVE lowers 64 bit shifts to lsll and lsrl
1173  // assuming that ISD::SRL and SRA of i64 are already marked custom
1174  if (Subtarget->hasMVEIntegerOps())
1176 
1177  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1178  if (Subtarget->isThumb1Only()) {
1182  }
1183 
1184  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1186 
1187  // ARM does not have ROTL.
1189  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1192  }
1195  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1198  }
1199 
1200  // @llvm.readcyclecounter requires the Performance Monitors extension.
1201  // Default to the 0 expansion on unsupported platforms.
1202  // FIXME: Technically there are older ARM CPUs that have
1203  // implementation-specific ways of obtaining this information.
1204  if (Subtarget->hasPerfMon())
1206 
1207  // Only ARMv6 has BSWAP.
1208  if (!Subtarget->hasV6Ops())
1210 
1211  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1212  : Subtarget->hasDivideInARMMode();
1213  if (!hasDivide) {
1214  // These are expanded into libcalls if the cpu doesn't have HW divider.
1217  }
1218 
1219  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1222 
1225  }
1226 
1229 
1230  // Register based DivRem for AEABI (RTABI 4.2)
1231  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1232  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1233  Subtarget->isTargetWindows()) {
1236  HasStandaloneRem = false;
1237 
1238  if (Subtarget->isTargetWindows()) {
1239  const struct {
1240  const RTLIB::Libcall Op;
1241  const char * const Name;
1242  const CallingConv::ID CC;
1243  } LibraryCalls[] = {
1244  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1245  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1246  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1247  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1248 
1249  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1250  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1251  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1252  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1253  };
1254 
1255  for (const auto &LC : LibraryCalls) {
1256  setLibcallName(LC.Op, LC.Name);
1257  setLibcallCallingConv(LC.Op, LC.CC);
1258  }
1259  } else {
1260  const struct {
1261  const RTLIB::Libcall Op;
1262  const char * const Name;
1263  const CallingConv::ID CC;
1264  } LibraryCalls[] = {
1265  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1266  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1267  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1268  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1269 
1270  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1271  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1272  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1273  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1274  };
1275 
1276  for (const auto &LC : LibraryCalls) {
1277  setLibcallName(LC.Op, LC.Name);
1278  setLibcallCallingConv(LC.Op, LC.CC);
1279  }
1280  }
1281 
1286  } else {
1289  }
1290 
1291  if (Subtarget->getTargetTriple().isOSMSVCRT()) {
1292  // MSVCRT doesn't have powi; fall back to pow
1293  setLibcallName(RTLIB::POWI_F32, nullptr);
1294  setLibcallName(RTLIB::POWI_F64, nullptr);
1295  }
1296 
1301 
1304 
1305  // Use the default implementation.
1312 
1313  if (Subtarget->isTargetWindows())
1315  else
1317 
1318  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1319  // the default expansion.
1320  InsertFencesForAtomic = false;
1321  if (Subtarget->hasAnyDataBarrier() &&
1322  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1323  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1324  // to ldrex/strex loops already.
1326  if (!Subtarget->isThumb() || !Subtarget->isMClass())
1328 
1329  // On v8, we have particularly efficient implementations of atomic fences
1330  // if they can be combined with nearby atomic loads and stores.
1331  if (!Subtarget->hasAcquireRelease() ||
1332  getTargetMachine().getOptLevel() == 0) {
1333  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1334  InsertFencesForAtomic = true;
1335  }
1336  } else {
1337  // If there's anything we can use as a barrier, go through custom lowering
1338  // for ATOMIC_FENCE.
1339  // If target has DMB in thumb, Fences can be inserted.
1340  if (Subtarget->hasDataBarrier())
1341  InsertFencesForAtomic = true;
1342 
1344  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1345 
1346  // Set them all for expansion, which will force libcalls.
1359  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1360  // Unordered/Monotonic case.
1361  if (!InsertFencesForAtomic) {
1364  }
1365  }
1366 
1368 
1369  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1370  if (!Subtarget->hasV6Ops()) {
1373  }
1375 
1376  if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1377  !Subtarget->isThumb1Only()) {
1378  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1379  // iff target supports vfp2.
1383  }
1384 
1385  // We want to custom lower some of our intrinsics.
1390  if (Subtarget->useSjLjEH())
1391  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1392 
1402  if (Subtarget->hasFullFP16()) {
1406  }
1407 
1409 
1412  if (Subtarget->hasFullFP16())
1417 
1418  // We don't support sin/cos/fmod/copysign/pow
1427  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1428  !Subtarget->isThumb1Only()) {
1431  }
1434 
1435  if (!Subtarget->hasVFP4Base()) {
1438  }
1439 
1440  // Various VFP goodness
1441  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1442  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1443  if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1446  }
1447 
1448  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1449  if (!Subtarget->hasFP16()) {
1452  }
1453 
1454  // Strict floating-point comparisons need custom lowering.
1461  }
1462 
1463  // Use __sincos_stret if available.
1464  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1465  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1468  }
1469 
1470  // FP-ARMv8 implements a lot of rounding-like FP operations.
1471  if (Subtarget->hasFPARMv8Base()) {
1480  if (Subtarget->hasNEON()) {
1485  }
1486 
1487  if (Subtarget->hasFP64()) {
1496  }
1497  }
1498 
1499  // FP16 often need to be promoted to call lib functions
1500  if (Subtarget->hasFullFP16()) {
1513 
1515  }
1516 
1517  if (Subtarget->hasNEON()) {
1518  // vmin and vmax aren't available in a scalar form, so we can use
1519  // a NEON instruction with an undef lane instead. This has a performance
1520  // penalty on some cores, so we don't do this unless we have been
1521  // asked to by the core tuning model.
1522  if (Subtarget->useNEONForSinglePrecisionFP()) {
1527  }
1532 
1533  if (Subtarget->hasFullFP16()) {
1538 
1543  }
1544  }
1545 
1546  // We have target-specific dag combine patterns for the following nodes:
1547  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1554 
1555  if (Subtarget->hasMVEIntegerOps())
1557 
1558  if (Subtarget->hasV6Ops())
1560  if (Subtarget->isThumb1Only())
1562 
1564 
1565  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1566  !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1568  else
1570 
1571  //// temporary - rewrite interface to use type
1572  MaxStoresPerMemset = 8;
1574  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1576  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1578 
1579  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1580  // are at least 4 bytes aligned.
1582 
1583  // Prefer likely predicted branches to selects on out-of-order cores.
1584  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1585 
1586  setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1587 
1588  setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1589 
1590  if (Subtarget->isThumb() || Subtarget->isThumb2())
1592 }
1593 
1595  return Subtarget->useSoftFloat();
1596 }
1597 
1598 // FIXME: It might make sense to define the representative register class as the
1599 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1600 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1601 // SPR's representative would be DPR_VFP2. This should work well if register
1602 // pressure tracking were modified such that a register use would increment the
1603 // pressure of the register class's representative and all of it's super
1604 // classes' representatives transitively. We have not implemented this because
1605 // of the difficulty prior to coalescing of modeling operand register classes
1606 // due to the common occurrence of cross class copies and subregister insertions
1607 // and extractions.
1608 std::pair<const TargetRegisterClass *, uint8_t>
1610  MVT VT) const {
1611  const TargetRegisterClass *RRC = nullptr;
1612  uint8_t Cost = 1;
1613  switch (VT.SimpleTy) {
1614  default:
1616  // Use DPR as representative register class for all floating point
1617  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1618  // the cost is 1 for both f32 and f64.
1619  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1620  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1621  RRC = &ARM::DPRRegClass;
1622  // When NEON is used for SP, only half of the register file is available
1623  // because operations that define both SP and DP results will be constrained
1624  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1625  // coalescing by double-counting the SP regs. See the FIXME above.
1626  if (Subtarget->useNEONForSinglePrecisionFP())
1627  Cost = 2;
1628  break;
1629  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1630  case MVT::v4f32: case MVT::v2f64:
1631  RRC = &ARM::DPRRegClass;
1632  Cost = 2;
1633  break;
1634  case MVT::v4i64:
1635  RRC = &ARM::DPRRegClass;
1636  Cost = 4;
1637  break;
1638  case MVT::v8i64:
1639  RRC = &ARM::DPRRegClass;
1640  Cost = 8;
1641  break;
1642  }
1643  return std::make_pair(RRC, Cost);
1644 }
1645 
1646 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1647 #define MAKE_CASE(V) \
1648  case V: \
1649  return #V;
1650  switch ((ARMISD::NodeType)Opcode) {
1651  case ARMISD::FIRST_NUMBER:
1652  break;
1856 #undef MAKE_CASE
1857  }
1858  return nullptr;
1859 }
1860 
1862  EVT VT) const {
1863  if (!VT.isVector())
1864  return getPointerTy(DL);
1865 
1866  // MVE has a predicate register.
1867  if ((Subtarget->hasMVEIntegerOps() &&
1868  (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1869  VT == MVT::v16i8)) ||
1870  (Subtarget->hasMVEFloatOps() &&
1871  (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1874 }
1875 
1876 /// getRegClassFor - Return the register class that should be used for the
1877 /// specified value type.
1878 const TargetRegisterClass *
1879 ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1880  (void)isDivergent;
1881  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1882  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1883  // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1884  // MVE Q registers.
1885  if (Subtarget->hasNEON()) {
1886  if (VT == MVT::v4i64)
1887  return &ARM::QQPRRegClass;
1888  if (VT == MVT::v8i64)
1889  return &ARM::QQQQPRRegClass;
1890  }
1891  if (Subtarget->hasMVEIntegerOps()) {
1892  if (VT == MVT::v4i64)
1893  return &ARM::MQQPRRegClass;
1894  if (VT == MVT::v8i64)
1895  return &ARM::MQQQQPRRegClass;
1896  }
1897  return TargetLowering::getRegClassFor(VT);
1898 }
1899 
1900 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1901 // source/dest is aligned and the copy size is large enough. We therefore want
1902 // to align such objects passed to memory intrinsics.
1904  unsigned &PrefAlign) const {
1905  if (!isa<MemIntrinsic>(CI))
1906  return false;
1907  MinSize = 8;
1908  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1909  // cycle faster than 4-byte aligned LDM.
1910  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1911  return true;
1912 }
1913 
1914 // Create a fast isel object.
1915 FastISel *
1917  const TargetLibraryInfo *libInfo) const {
1918  return ARM::createFastISel(funcInfo, libInfo);
1919 }
1920 
1922  unsigned NumVals = N->getNumValues();
1923  if (!NumVals)
1924  return Sched::RegPressure;
1925 
1926  for (unsigned i = 0; i != NumVals; ++i) {
1927  EVT VT = N->getValueType(i);
1928  if (VT == MVT::Glue || VT == MVT::Other)
1929  continue;
1930  if (VT.isFloatingPoint() || VT.isVector())
1931  return Sched::ILP;
1932  }
1933 
1934  if (!N->isMachineOpcode())
1935  return Sched::RegPressure;
1936 
1937  // Load are scheduled for latency even if there instruction itinerary
1938  // is not available.
1939  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1940  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1941 
1942  if (MCID.getNumDefs() == 0)
1943  return Sched::RegPressure;
1944  if (!Itins->isEmpty() &&
1945  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1946  return Sched::ILP;
1947 
1948  return Sched::RegPressure;
1949 }
1950 
1951 //===----------------------------------------------------------------------===//
1952 // Lowering Code
1953 //===----------------------------------------------------------------------===//
1954 
1955 static bool isSRL16(const SDValue &Op) {
1956  if (Op.getOpcode() != ISD::SRL)
1957  return false;
1958  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1959  return Const->getZExtValue() == 16;
1960  return false;
1961 }
1962 
1963 static bool isSRA16(const SDValue &Op) {
1964  if (Op.getOpcode() != ISD::SRA)
1965  return false;
1966  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1967  return Const->getZExtValue() == 16;
1968  return false;
1969 }
1970 
1971 static bool isSHL16(const SDValue &Op) {
1972  if (Op.getOpcode() != ISD::SHL)
1973  return false;
1974  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1975  return Const->getZExtValue() == 16;
1976  return false;
1977 }
1978 
1979 // Check for a signed 16-bit value. We special case SRA because it makes it
1980 // more simple when also looking for SRAs that aren't sign extending a
1981 // smaller value. Without the check, we'd need to take extra care with
1982 // checking order for some operations.
1983 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1984  if (isSRA16(Op))
1985  return isSHL16(Op.getOperand(0));
1986  return DAG.ComputeNumSignBits(Op) == 17;
1987 }
1988 
1989 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1991  switch (CC) {
1992  default: llvm_unreachable("Unknown condition code!");
1993  case ISD::SETNE: return ARMCC::NE;
1994  case ISD::SETEQ: return ARMCC::EQ;
1995  case ISD::SETGT: return ARMCC::GT;
1996  case ISD::SETGE: return ARMCC::GE;
1997  case ISD::SETLT: return ARMCC::LT;
1998  case ISD::SETLE: return ARMCC::LE;
1999  case ISD::SETUGT: return ARMCC::HI;
2000  case ISD::SETUGE: return ARMCC::HS;
2001  case ISD::SETULT: return ARMCC::LO;
2002  case ISD::SETULE: return ARMCC::LS;
2003  }
2004 }
2005 
2006 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2008  ARMCC::CondCodes &CondCode2) {
2009  CondCode2 = ARMCC::AL;
2010  switch (CC) {
2011  default: llvm_unreachable("Unknown FP condition!");
2012  case ISD::SETEQ:
2013  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
2014  case ISD::SETGT:
2015  case ISD::SETOGT: CondCode = ARMCC::GT; break;
2016  case ISD::SETGE:
2017  case ISD::SETOGE: CondCode = ARMCC::GE; break;
2018  case ISD::SETOLT: CondCode = ARMCC::MI; break;
2019  case ISD::SETOLE: CondCode = ARMCC::LS; break;
2020  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2021  case ISD::SETO: CondCode = ARMCC::VC; break;
2022  case ISD::SETUO: CondCode = ARMCC::VS; break;
2023  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2024  case ISD::SETUGT: CondCode = ARMCC::HI; break;
2025  case ISD::SETUGE: CondCode = ARMCC::PL; break;
2026  case ISD::SETLT:
2027  case ISD::SETULT: CondCode = ARMCC::LT; break;
2028  case ISD::SETLE:
2029  case ISD::SETULE: CondCode = ARMCC::LE; break;
2030  case ISD::SETNE:
2031  case ISD::SETUNE: CondCode = ARMCC::NE; break;
2032  }
2033 }
2034 
2035 //===----------------------------------------------------------------------===//
2036 // Calling Convention Implementation
2037 //===----------------------------------------------------------------------===//
2038 
2039 /// getEffectiveCallingConv - Get the effective calling convention, taking into
2040 /// account presence of floating point hardware and calling convention
2041 /// limitations, such as support for variadic functions.
2043 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2044  bool isVarArg) const {
2045  switch (CC) {
2046  default:
2047  report_fatal_error("Unsupported calling convention");
2049  case CallingConv::ARM_APCS:
2050  case CallingConv::GHC:
2052  return CC;
2056  case CallingConv::Swift:
2059  case CallingConv::C:
2060  case CallingConv::Tail:
2061  if (!Subtarget->isAAPCS_ABI())
2062  return CallingConv::ARM_APCS;
2063  else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
2064  getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2065  !isVarArg)
2067  else
2068  return CallingConv::ARM_AAPCS;
2069  case CallingConv::Fast:
2071  if (!Subtarget->isAAPCS_ABI()) {
2072  if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2073  return CallingConv::Fast;
2074  return CallingConv::ARM_APCS;
2075  } else if (Subtarget->hasVFP2Base() &&
2076  !Subtarget->isThumb1Only() && !isVarArg)
2078  else
2079  return CallingConv::ARM_AAPCS;
2080  }
2081 }
2082 
2084  bool isVarArg) const {
2085  return CCAssignFnForNode(CC, false, isVarArg);
2086 }
2087 
2089  bool isVarArg) const {
2090  return CCAssignFnForNode(CC, true, isVarArg);
2091 }
2092 
2093 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2094 /// CallingConvention.
2095 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2096  bool Return,
2097  bool isVarArg) const {
2098  switch (getEffectiveCallingConv(CC, isVarArg)) {
2099  default:
2100  report_fatal_error("Unsupported calling convention");
2101  case CallingConv::ARM_APCS:
2102  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2104  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2106  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2107  case CallingConv::Fast:
2108  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2109  case CallingConv::GHC:
2110  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2112  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2114  return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2115  }
2116 }
2117 
2118 SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2119  MVT LocVT, MVT ValVT, SDValue Val) const {
2120  Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2121  Val);
2122  if (Subtarget->hasFullFP16()) {
2123  Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2124  } else {
2125  Val = DAG.getNode(ISD::TRUNCATE, dl,
2126  MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2127  Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2128  }
2129  return Val;
2130 }
2131 
2132 SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2133  MVT LocVT, MVT ValVT,
2134  SDValue Val) const {
2135  if (Subtarget->hasFullFP16()) {
2136  Val = DAG.getNode(ARMISD::VMOVrh, dl,
2137  MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2138  } else {
2139  Val = DAG.getNode(ISD::BITCAST, dl,
2140  MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2141  Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2142  MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2143  }
2144  return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2145 }
2146 
2147 /// LowerCallResult - Lower the result values of a call into the
2148 /// appropriate copies out of appropriate physical registers.
2149 SDValue ARMTargetLowering::LowerCallResult(
2150  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2151  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2152  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2153  SDValue ThisVal) const {
2154  // Assign locations to each value returned by this call.
2156  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2157  *DAG.getContext());
2158  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2159 
2160  // Copy all of the result registers out of their specified physreg.
2161  for (unsigned i = 0; i != RVLocs.size(); ++i) {
2162  CCValAssign VA = RVLocs[i];
2163 
2164  // Pass 'this' value directly from the argument to return value, to avoid
2165  // reg unit interference
2166  if (i == 0 && isThisReturn) {
2167  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2168  "unexpected return calling convention register assignment");
2169  InVals.push_back(ThisVal);
2170  continue;
2171  }
2172 
2173  SDValue Val;
2174  if (VA.needsCustom() &&
2175  (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2176  // Handle f64 or half of a v2f64.
2177  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2178  InFlag);
2179  Chain = Lo.getValue(1);
2180  InFlag = Lo.getValue(2);
2181  VA = RVLocs[++i]; // skip ahead to next loc
2182  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2183  InFlag);
2184  Chain = Hi.getValue(1);
2185  InFlag = Hi.getValue(2);
2186  if (!Subtarget->isLittle())
2187  std::swap (Lo, Hi);
2188  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2189 
2190  if (VA.getLocVT() == MVT::v2f64) {
2191  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2192  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2193  DAG.getConstant(0, dl, MVT::i32));
2194 
2195  VA = RVLocs[++i]; // skip ahead to next loc
2196  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2197  Chain = Lo.getValue(1);
2198  InFlag = Lo.getValue(2);
2199  VA = RVLocs[++i]; // skip ahead to next loc
2200  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2201  Chain = Hi.getValue(1);
2202  InFlag = Hi.getValue(2);
2203  if (!Subtarget->isLittle())
2204  std::swap (Lo, Hi);
2205  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2206  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2207  DAG.getConstant(1, dl, MVT::i32));
2208  }
2209  } else {
2210  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2211  InFlag);
2212  Chain = Val.getValue(1);
2213  InFlag = Val.getValue(2);
2214  }
2215 
2216  switch (VA.getLocInfo()) {
2217  default: llvm_unreachable("Unknown loc info!");
2218  case CCValAssign::Full: break;
2219  case CCValAssign::BCvt:
2220  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2221  break;
2222  }
2223 
2224  // f16 arguments have their size extended to 4 bytes and passed as if they
2225  // had been copied to the LSBs of a 32-bit register.
2226  // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2227  if (VA.needsCustom() &&
2228  (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2229  Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2230 
2231  InVals.push_back(Val);
2232  }
2233 
2234  return Chain;
2235 }
2236 
2237 std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2238  const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2239  bool IsTailCall, int SPDiff) const {
2240  SDValue DstAddr;
2241  MachinePointerInfo DstInfo;
2242  int32_t Offset = VA.getLocMemOffset();
2243  MachineFunction &MF = DAG.getMachineFunction();
2244 
2245  if (IsTailCall) {
2246  Offset += SPDiff;
2247  auto PtrVT = getPointerTy(DAG.getDataLayout());
2248  int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2249  int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2250  DstAddr = DAG.getFrameIndex(FI, PtrVT);
2251  DstInfo =
2253  } else {
2254  SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2255  DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2256  StackPtr, PtrOff);
2257  DstInfo =
2259  }
2260 
2261  return std::make_pair(DstAddr, DstInfo);
2262 }
2263 
2264 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2265  SDValue Chain, SDValue &Arg,
2266  RegsToPassVector &RegsToPass,
2267  CCValAssign &VA, CCValAssign &NextVA,
2268  SDValue &StackPtr,
2269  SmallVectorImpl<SDValue> &MemOpChains,
2270  bool IsTailCall,
2271  int SPDiff) const {
2272  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2273  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2274  unsigned id = Subtarget->isLittle() ? 0 : 1;
2275  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2276 
2277  if (NextVA.isRegLoc())
2278  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2279  else {
2280  assert(NextVA.isMemLoc());
2281  if (!StackPtr.getNode())
2282  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2283  getPointerTy(DAG.getDataLayout()));
2284 
2285  SDValue DstAddr;
2286  MachinePointerInfo DstInfo;
2287  std::tie(DstAddr, DstInfo) =
2288  computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2289  MemOpChains.push_back(
2290  DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2291  }
2292 }
2293 
2294 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2295  return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2297 }
2298 
2299 /// LowerCall - Lowering a call into a callseq_start <-
2300 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2301 /// nodes.
2302 SDValue
2303 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2304  SmallVectorImpl<SDValue> &InVals) const {
2305  SelectionDAG &DAG = CLI.DAG;
2306  SDLoc &dl = CLI.DL;
2308  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2310  SDValue Chain = CLI.Chain;
2311  SDValue Callee = CLI.Callee;
2312  bool &isTailCall = CLI.IsTailCall;
2313  CallingConv::ID CallConv = CLI.CallConv;
2314  bool doesNotRet = CLI.DoesNotReturn;
2315  bool isVarArg = CLI.IsVarArg;
2316 
2317  MachineFunction &MF = DAG.getMachineFunction();
2320  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2321  bool isThisReturn = false;
2322  bool isCmseNSCall = false;
2323  bool isSibCall = false;
2324  bool PreferIndirect = false;
2325  bool GuardWithBTI = false;
2326 
2327  // Lower 'returns_twice' calls to a pseudo-instruction.
2328  if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2329  !Subtarget->getNoBTIAtReturnTwice())
2330  GuardWithBTI = AFI->branchTargetEnforcement();
2331 
2332  // Determine whether this is a non-secure function call.
2333  if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2334  isCmseNSCall = true;
2335 
2336  // Disable tail calls if they're not supported.
2337  if (!Subtarget->supportsTailCall())
2338  isTailCall = false;
2339 
2340  // For both the non-secure calls and the returns from a CMSE entry function,
2341  // the function needs to do some extra work afte r the call, or before the
2342  // return, respectively, thus it cannot end with atail call
2343  if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2344  isTailCall = false;
2345 
2346  if (isa<GlobalAddressSDNode>(Callee)) {
2347  // If we're optimizing for minimum size and the function is called three or
2348  // more times in this block, we can improve codesize by calling indirectly
2349  // as BLXr has a 16-bit encoding.
2350  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2351  if (CLI.CB) {
2352  auto *BB = CLI.CB->getParent();
2353  PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2354  count_if(GV->users(), [&BB](const User *U) {
2355  return isa<Instruction>(U) &&
2356  cast<Instruction>(U)->getParent() == BB;
2357  }) > 2;
2358  }
2359  }
2360  if (isTailCall) {
2361  // Check if it's really possible to do a tail call.
2362  isTailCall = IsEligibleForTailCallOptimization(
2363  Callee, CallConv, isVarArg, isStructRet,
2364  MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2365  PreferIndirect);
2366 
2367  if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2368  CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2369  isSibCall = true;
2370 
2371  // We don't support GuaranteedTailCallOpt for ARM, only automatically
2372  // detected sibcalls.
2373  if (isTailCall)
2374  ++NumTailCalls;
2375  }
2376 
2377  if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2378  report_fatal_error("failed to perform tail call elimination on a call "
2379  "site marked musttail");
2380  // Analyze operands of the call, assigning locations to each operand.
2382  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2383  *DAG.getContext());
2384  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2385 
2386  // Get a count of how many bytes are to be pushed on the stack.
2387  unsigned NumBytes = CCInfo.getNextStackOffset();
2388 
2389  // SPDiff is the byte offset of the call's argument area from the callee's.
2390  // Stores to callee stack arguments will be placed in FixedStackSlots offset
2391  // by this amount for a tail call. In a sibling call it must be 0 because the
2392  // caller will deallocate the entire stack and the callee still expects its
2393  // arguments to begin at SP+0. Completely unused for non-tail calls.
2394  int SPDiff = 0;
2395 
2396  if (isTailCall && !isSibCall) {
2397  auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2398  unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2399 
2400  // Since callee will pop argument stack as a tail call, we must keep the
2401  // popped size 16-byte aligned.
2403  NumBytes = alignTo(NumBytes, StackAlign);
2404 
2405  // SPDiff will be negative if this tail call requires more space than we
2406  // would automatically have in our incoming argument space. Positive if we
2407  // can actually shrink the stack.
2408  SPDiff = NumReusableBytes - NumBytes;
2409 
2410  // If this call requires more stack than we have available from
2411  // LowerFormalArguments, tell FrameLowering to reserve space for it.
2412  if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2413  AFI->setArgRegsSaveSize(-SPDiff);
2414  }
2415 
2416  if (isSibCall) {
2417  // For sibling tail calls, memory operands are available in our caller's stack.
2418  NumBytes = 0;
2419  } else {
2420  // Adjust the stack pointer for the new arguments...
2421  // These operations are automatically eliminated by the prolog/epilog pass
2422  Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2423  }
2424 
2425  SDValue StackPtr =
2426  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2427 
2428  RegsToPassVector RegsToPass;
2429  SmallVector<SDValue, 8> MemOpChains;
2430 
2431  // During a tail call, stores to the argument area must happen after all of
2432  // the function's incoming arguments have been loaded because they may alias.
2433  // This is done by folding in a TokenFactor from LowerFormalArguments, but
2434  // there's no point in doing so repeatedly so this tracks whether that's
2435  // happened yet.
2436  bool AfterFormalArgLoads = false;
2437 
2438  // Walk the register/memloc assignments, inserting copies/loads. In the case
2439  // of tail call optimization, arguments are handled later.
2440  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2441  i != e;
2442  ++i, ++realArgIdx) {
2443  CCValAssign &VA = ArgLocs[i];
2444  SDValue Arg = OutVals[realArgIdx];
2445  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2446  bool isByVal = Flags.isByVal();
2447 
2448  // Promote the value if needed.
2449  switch (VA.getLocInfo()) {
2450  default: llvm_unreachable("Unknown loc info!");
2451  case CCValAssign::Full: break;
2452  case CCValAssign::SExt:
2453  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2454  break;
2455  case CCValAssign::ZExt:
2456  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2457  break;
2458  case CCValAssign::AExt:
2459  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2460  break;
2461  case CCValAssign::BCvt:
2462  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2463  break;
2464  }
2465 
2466  if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2467  Chain = DAG.getStackArgumentTokenFactor(Chain);
2468  AfterFormalArgLoads = true;
2469  }
2470 
2471  // f16 arguments have their size extended to 4 bytes and passed as if they
2472  // had been copied to the LSBs of a 32-bit register.
2473  // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2474  if (VA.needsCustom() &&
2475  (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2476  Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2477  } else {
2478  // f16 arguments could have been extended prior to argument lowering.
2479  // Mask them arguments if this is a CMSE nonsecure call.
2480  auto ArgVT = Outs[realArgIdx].ArgVT;
2481  if (isCmseNSCall && (ArgVT == MVT::f16)) {
2482  auto LocBits = VA.getLocVT().getSizeInBits();
2483  auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2484  SDValue Mask =
2485  DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2486  Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2487  Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2488  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2489  }
2490  }
2491 
2492  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2493  if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2495  DAG.getConstant(0, dl, MVT::i32));
2497  DAG.getConstant(1, dl, MVT::i32));
2498 
2499  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2500  StackPtr, MemOpChains, isTailCall, SPDiff);
2501 
2502  VA = ArgLocs[++i]; // skip ahead to next loc
2503  if (VA.isRegLoc()) {
2504  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2505  StackPtr, MemOpChains, isTailCall, SPDiff);
2506  } else {
2507  assert(VA.isMemLoc());
2508  SDValue DstAddr;
2509  MachinePointerInfo DstInfo;
2510  std::tie(DstAddr, DstInfo) =
2511  computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2512  MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2513  }
2514  } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2515  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2516  StackPtr, MemOpChains, isTailCall, SPDiff);
2517  } else if (VA.isRegLoc()) {
2518  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2519  Outs[0].VT == MVT::i32) {
2520  assert(VA.getLocVT() == MVT::i32 &&
2521  "unexpected calling convention register assignment");
2522  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2523  "unexpected use of 'returned'");
2524  isThisReturn = true;
2525  }
2526  const TargetOptions &Options = DAG.getTarget().Options;
2527  if (Options.EmitCallSiteInfo)
2528  CSInfo.emplace_back(VA.getLocReg(), i);
2529  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2530  } else if (isByVal) {
2531  assert(VA.isMemLoc());
2532  unsigned offset = 0;
2533 
2534  // True if this byval aggregate will be split between registers
2535  // and memory.
2536  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2537  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2538 
2539  if (CurByValIdx < ByValArgsCount) {
2540 
2541  unsigned RegBegin, RegEnd;
2542  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2543 
2544  EVT PtrVT =
2546  unsigned int i, j;
2547  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2548  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2549  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2550  SDValue Load =
2551  DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2552  DAG.InferPtrAlign(AddArg));
2553  MemOpChains.push_back(Load.getValue(1));
2554  RegsToPass.push_back(std::make_pair(j, Load));
2555  }
2556 
2557  // If parameter size outsides register area, "offset" value
2558  // helps us to calculate stack slot for remained part properly.
2559  offset = RegEnd - RegBegin;
2560 
2561  CCInfo.nextInRegsParam();
2562  }
2563 
2564  if (Flags.getByValSize() > 4*offset) {
2565  auto PtrVT = getPointerTy(DAG.getDataLayout());
2566  SDValue Dst;
2567  MachinePointerInfo DstInfo;
2568  std::tie(Dst, DstInfo) =
2569  computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2570  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2571  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2572  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2573  MVT::i32);
2574  SDValue AlignNode =
2575  DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2576 
2577  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2578  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2579  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2580  Ops));
2581  }
2582  } else {
2583  assert(VA.isMemLoc());
2584  SDValue DstAddr;
2585  MachinePointerInfo DstInfo;
2586  std::tie(DstAddr, DstInfo) =
2587  computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2588 
2589  SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2590  MemOpChains.push_back(Store);
2591  }
2592  }
2593 
2594  if (!MemOpChains.empty())
2595  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2596 
2597  // Build a sequence of copy-to-reg nodes chained together with token chain
2598  // and flag operands which copy the outgoing args into the appropriate regs.
2599  SDValue InFlag;
2600  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2601  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2602  RegsToPass[i].second, InFlag);
2603  InFlag = Chain.getValue(1);
2604  }
2605 
2606  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2607  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2608  // node so that legalize doesn't hack it.
2609  bool isDirect = false;
2610 
2611  const TargetMachine &TM = getTargetMachine();
2612  const Module *Mod = MF.getFunction().getParent();
2613  const GlobalValue *GV = nullptr;
2614  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2615  GV = G->getGlobal();
2616  bool isStub =
2617  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2618 
2619  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2620  bool isLocalARMFunc = false;
2621  auto PtrVt = getPointerTy(DAG.getDataLayout());
2622 
2623  if (Subtarget->genLongCalls()) {
2624  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2625  "long-calls codegen is not position independent!");
2626  // Handle a global address or an external symbol. If it's not one of
2627  // those, the target's already in a register, so we don't need to do
2628  // anything extra.
2629  if (isa<GlobalAddressSDNode>(Callee)) {
2630  // Create a constant pool entry for the callee address
2631  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2632  ARMConstantPoolValue *CPV =
2633  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2634 
2635  // Get the address of the callee into a register
2636  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2637  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2638  Callee = DAG.getLoad(
2639  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2641  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2642  const char *Sym = S->getSymbol();
2643 
2644  // Create a constant pool entry for the callee address
2645  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2646  ARMConstantPoolValue *CPV =
2648  ARMPCLabelIndex, 0);
2649  // Get the address of the callee into a register
2650  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2651  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2652  Callee = DAG.getLoad(
2653  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2655  }
2656  } else if (isa<GlobalAddressSDNode>(Callee)) {
2657  if (!PreferIndirect) {
2658  isDirect = true;
2659  bool isDef = GV->isStrongDefinitionForLinker();
2660 
2661  // ARM call to a local ARM function is predicable.
2662  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2663  // tBX takes a register source operand.
2664  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2665  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2666  Callee = DAG.getNode(
2667  ARMISD::WrapperPIC, dl, PtrVt,
2668  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2669  Callee = DAG.getLoad(
2670  PtrVt, dl, DAG.getEntryNode(), Callee,
2674  } else if (Subtarget->isTargetCOFF()) {
2675  assert(Subtarget->isTargetWindows() &&
2676  "Windows is the only supported COFF target");
2677  unsigned TargetFlags = ARMII::MO_NO_FLAG;
2678  if (GV->hasDLLImportStorageClass())
2679  TargetFlags = ARMII::MO_DLLIMPORT;
2680  else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
2681  TargetFlags = ARMII::MO_COFFSTUB;
2682  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
2683  TargetFlags);
2684  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2685  Callee =
2686  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2687  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2689  } else {
2690  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2691  }
2692  }
2693  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2694  isDirect = true;
2695  // tBX takes a register source operand.
2696  const char *Sym = S->getSymbol();
2697  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2698  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2699  ARMConstantPoolValue *CPV =
2701  ARMPCLabelIndex, 4);
2702  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2703  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2704  Callee = DAG.getLoad(
2705  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2707  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2708  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2709  } else {
2710  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2711  }
2712  }
2713 
2714  if (isCmseNSCall) {
2715  assert(!isARMFunc && !isDirect &&
2716  "Cannot handle call to ARM function or direct call");
2717  if (NumBytes > 0) {
2719  "call to non-secure function would "
2720  "require passing arguments on stack",
2721  dl.getDebugLoc());
2722  DAG.getContext()->diagnose(Diag);
2723  }
2724  if (isStructRet) {
2727  "call to non-secure function would return value through pointer",
2728  dl.getDebugLoc());
2729  DAG.getContext()->diagnose(Diag);
2730  }
2731  }
2732 
2733  // FIXME: handle tail calls differently.
2734  unsigned CallOpc;
2735  if (Subtarget->isThumb()) {
2736  if (GuardWithBTI)
2737  CallOpc = ARMISD::t2CALL_BTI;
2738  else if (isCmseNSCall)
2739  CallOpc = ARMISD::tSECALL;
2740  else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2741  CallOpc = ARMISD::CALL_NOLINK;
2742  else
2743  CallOpc = ARMISD::CALL;
2744  } else {
2745  if (!isDirect && !Subtarget->hasV5TOps())
2746  CallOpc = ARMISD::CALL_NOLINK;
2747  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2748  // Emit regular call when code size is the priority
2749  !Subtarget->hasMinSize())
2750  // "mov lr, pc; b _foo" to avoid confusing the RSP
2751  CallOpc = ARMISD::CALL_NOLINK;
2752  else
2753  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2754  }
2755 
2756  // We don't usually want to end the call-sequence here because we would tidy
2757  // the frame up *after* the call, however in the ABI-changing tail-call case
2758  // we've carefully laid out the parameters so that when sp is reset they'll be
2759  // in the correct location.
2760  if (isTailCall && !isSibCall) {
2761  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
2762  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2763  InFlag = Chain.getValue(1);
2764  }
2765 
2766  std::vector<SDValue> Ops;
2767  Ops.push_back(Chain);
2768  Ops.push_back(Callee);
2769 
2770  if (isTailCall) {
2771  Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32));
2772  }
2773 
2774  // Add argument registers to the end of the list so that they are known live
2775  // into the call.
2776  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2777  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2778  RegsToPass[i].second.getValueType()));
2779 
2780  // Add a register mask operand representing the call-preserved registers.
2781  if (!isTailCall) {
2782  const uint32_t *Mask;
2783  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2784  if (isThisReturn) {
2785  // For 'this' returns, use the R0-preserving mask if applicable
2786  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2787  if (!Mask) {
2788  // Set isThisReturn to false if the calling convention is not one that
2789  // allows 'returned' to be modeled in this way, so LowerCallResult does
2790  // not try to pass 'this' straight through
2791  isThisReturn = false;
2792  Mask = ARI->getCallPreservedMask(MF, CallConv);
2793  }
2794  } else
2795  Mask = ARI->getCallPreservedMask(MF, CallConv);
2796 
2797  assert(Mask && "Missing call preserved mask for calling convention");
2798  Ops.push_back(DAG.getRegisterMask(Mask));
2799  }
2800 
2801  if (InFlag.getNode())
2802  Ops.push_back(InFlag);
2803 
2804  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2805  if (isTailCall) {
2807  SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2808  DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2809  return Ret;
2810  }
2811 
2812  // Returns a chain and a flag for retval copy to use.
2813  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2814  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2815  InFlag = Chain.getValue(1);
2816  DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2817 
2818  // If we're guaranteeing tail-calls will be honoured, the callee must
2819  // pop its own argument stack on return. But this call is *not* a tail call so
2820  // we need to undo that after it returns to restore the status-quo.
2821  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
2822  uint64_t CalleePopBytes =
2823  canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
2824 
2825  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2826  DAG.getIntPtrConstant(CalleePopBytes, dl, true),
2827  InFlag, dl);
2828  if (!Ins.empty())
2829  InFlag = Chain.getValue(1);
2830 
2831  // Handle result values, copying them out of physregs into vregs that we
2832  // return.
2833  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2834  InVals, isThisReturn,
2835  isThisReturn ? OutVals[0] : SDValue());
2836 }
2837 
2838 /// HandleByVal - Every parameter *after* a byval parameter is passed
2839 /// on the stack. Remember the next parameter register to allocate,
2840 /// and then confiscate the rest of the parameter registers to insure
2841 /// this.
2842 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2843  Align Alignment) const {
2844  // Byval (as with any stack) slots are always at least 4 byte aligned.
2845  Alignment = std::max(Alignment, Align(4));
2846 
2847  unsigned Reg = State->AllocateReg(GPRArgRegs);
2848  if (!Reg)
2849  return;
2850 
2851  unsigned AlignInRegs = Alignment.value() / 4;
2852  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2853  for (unsigned i = 0; i < Waste; ++i)
2854  Reg = State->AllocateReg(GPRArgRegs);
2855 
2856  if (!Reg)
2857  return;
2858 
2859  unsigned Excess = 4 * (ARM::R4 - Reg);
2860 
2861  // Special case when NSAA != SP and parameter size greater than size of
2862  // all remained GPR regs. In that case we can't split parameter, we must
2863  // send it to stack. We also must set NCRN to R4, so waste all
2864  // remained registers.
2865  const unsigned NSAAOffset = State->getNextStackOffset();
2866  if (NSAAOffset != 0 && Size > Excess) {
2867  while (State->AllocateReg(GPRArgRegs))
2868  ;
2869  return;
2870  }
2871 
2872  // First register for byval parameter is the first register that wasn't
2873  // allocated before this method call, so it would be "reg".
2874  // If parameter is small enough to be saved in range [reg, r4), then
2875  // the end (first after last) register would be reg + param-size-in-regs,
2876  // else parameter would be splitted between registers and stack,
2877  // end register would be r4 in this case.
2878  unsigned ByValRegBegin = Reg;
2879  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2880  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2881  // Note, first register is allocated in the beginning of function already,
2882  // allocate remained amount of registers we need.
2883  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2884  State->AllocateReg(GPRArgRegs);
2885  // A byval parameter that is split between registers and memory needs its
2886  // size truncated here.
2887  // In the case where the entire structure fits in registers, we set the
2888  // size in memory to zero.
2889  Size = std::max<int>(Size - Excess, 0);
2890 }
2891 
2892 /// MatchingStackOffset - Return true if the given stack call argument is
2893 /// already available in the same position (relatively) of the caller's
2894 /// incoming argument stack.
2895 static
2898  const TargetInstrInfo *TII) {
2899  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2900  int FI = std::numeric_limits<int>::max();
2901  if (Arg.getOpcode() == ISD::CopyFromReg) {
2902  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2903  if (!Register::isVirtualRegister(VR))
2904  return false;
2905  MachineInstr *Def = MRI->getVRegDef(VR);
2906  if (!Def)
2907  return false;
2908  if (!Flags.isByVal()) {
2909  if (!TII->isLoadFromStackSlot(*Def, FI))
2910  return false;
2911  } else {
2912  return false;
2913  }
2914  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2915  if (Flags.isByVal())
2916  // ByVal argument is passed in as a pointer but it's now being
2917  // dereferenced. e.g.
2918  // define @foo(%struct.X* %A) {
2919  // tail call @bar(%struct.X* byval %A)
2920  // }
2921  return false;
2922  SDValue Ptr = Ld->getBasePtr();
2923  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2924  if (!FINode)
2925  return false;
2926  FI = FINode->getIndex();
2927  } else
2928  return false;
2929 
2931  if (!MFI.isFixedObjectIndex(FI))
2932  return false;
2933  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2934 }
2935 
2936 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2937 /// for tail call optimization. Targets which want to do tail call
2938 /// optimization should implement this function.
2939 bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2940  SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2941  bool isCalleeStructRet, bool isCallerStructRet,
2942  const SmallVectorImpl<ISD::OutputArg> &Outs,
2943  const SmallVectorImpl<SDValue> &OutVals,
2945  const bool isIndirect) const {
2946  MachineFunction &MF = DAG.getMachineFunction();
2947  const Function &CallerF = MF.getFunction();
2948  CallingConv::ID CallerCC = CallerF.getCallingConv();
2949 
2950  assert(Subtarget->supportsTailCall());
2951 
2952  // Indirect tail calls cannot be optimized for Thumb1 if the args
2953  // to the call take up r0-r3. The reason is that there are no legal registers
2954  // left to hold the pointer to the function to be called.
2955  // Similarly, if the function uses return address sign and authentication,
2956  // r12 is needed to hold the PAC and is not available to hold the callee
2957  // address.
2958  if (Outs.size() >= 4 &&
2959  (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) {
2960  if (Subtarget->isThumb1Only())
2961  return false;
2962  // Conservatively assume the function spills LR.
2964  return false;
2965  }
2966 
2967  // Look for obvious safe cases to perform tail call optimization that do not
2968  // require ABI changes. This is what gcc calls sibcall.
2969 
2970  // Exception-handling functions need a special set of instructions to indicate
2971  // a return to the hardware. Tail-calling another function would probably
2972  // break this.
2973  if (CallerF.hasFnAttribute("interrupt"))
2974  return false;
2975 
2976  if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
2977  return CalleeCC == CallerCC;
2978 
2979  // Also avoid sibcall optimization if either caller or callee uses struct
2980  // return semantics.
2981  if (isCalleeStructRet || isCallerStructRet)
2982  return false;
2983 
2984  // Externally-defined functions with weak linkage should not be
2985  // tail-called on ARM when the OS does not support dynamic
2986  // pre-emption of symbols, as the AAELF spec requires normal calls
2987  // to undefined weak functions to be replaced with a NOP or jump to the
2988  // next instruction. The behaviour of branch instructions in this
2989  // situation (as used for tail calls) is implementation-defined, so we
2990  // cannot rely on the linker replacing the tail call with a return.
2991  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2992  const GlobalValue *GV = G->getGlobal();
2994  if (GV->hasExternalWeakLinkage() &&
2995  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2996  return false;
2997  }
2998 
2999  // Check that the call results are passed in the same way.
3000  LLVMContext &C = *DAG.getContext();
3002  getEffectiveCallingConv(CalleeCC, isVarArg),
3003  getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3004  CCAssignFnForReturn(CalleeCC, isVarArg),
3005  CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
3006  return false;
3007  // The callee has to preserve all registers the caller needs to preserve.
3008  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3009  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3010  if (CalleeCC != CallerCC) {
3011  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3012  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3013  return false;
3014  }
3015 
3016  // If Caller's vararg or byval argument has been split between registers and
3017  // stack, do not perform tail call, since part of the argument is in caller's
3018  // local frame.
3019  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3020  if (AFI_Caller->getArgRegsSaveSize())
3021  return false;
3022 
3023  // If the callee takes no arguments then go on to check the results of the
3024  // call.
3025  if (!Outs.empty()) {
3026  // Check if stack adjustment is needed. For now, do not do this if any
3027  // argument is passed on the stack.
3029  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3030  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3031  if (CCInfo.getNextStackOffset()) {
3032  // Check if the arguments are already laid out in the right way as
3033  // the caller's fixed stack objects.
3034  MachineFrameInfo &MFI = MF.getFrameInfo();
3035  const MachineRegisterInfo *MRI = &MF.getRegInfo();
3036  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3037  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3038  i != e;
3039  ++i, ++realArgIdx) {
3040  CCValAssign &VA = ArgLocs[i];
3041  EVT RegVT = VA.getLocVT();
3042  SDValue Arg = OutVals[realArgIdx];
3043  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3044  if (VA.getLocInfo() == CCValAssign::Indirect)
3045  return false;
3046  if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3047  // f64 and vector types are split into multiple registers or
3048  // register/stack-slot combinations. The types will not match
3049  // the registers; give up on memory f64 refs until we figure
3050  // out what to do about this.
3051  if (!VA.isRegLoc())
3052  return false;
3053  if (!ArgLocs[++i].isRegLoc())
3054  return false;
3055  if (RegVT == MVT::v2f64) {
3056  if (!ArgLocs[++i].isRegLoc())
3057  return false;
3058  if (!ArgLocs[++i].isRegLoc())
3059  return false;
3060  }
3061  } else if (!VA.isRegLoc()) {
3062  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
3063  MFI, MRI, TII))
3064  return false;
3065  }
3066  }
3067  }
3068 
3069  const MachineRegisterInfo &MRI = MF.getRegInfo();
3070  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3071  return false;
3072  }
3073 
3074  return true;
3075 }
3076 
3077 bool
3078 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3079  MachineFunction &MF, bool isVarArg,
3080  const SmallVectorImpl<ISD::OutputArg> &Outs,
3081  LLVMContext &Context) const {
3083  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3084  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3085 }
3086 
3088  const SDLoc &DL, SelectionDAG &DAG) {
3089  const MachineFunction &MF = DAG.getMachineFunction();
3090  const Function &F = MF.getFunction();
3091 
3092  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3093 
3094  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3095  // version of the "preferred return address". These offsets affect the return
3096  // instruction if this is a return from PL1 without hypervisor extensions.
3097  // IRQ/FIQ: +4 "subs pc, lr, #4"
3098  // SWI: 0 "subs pc, lr, #0"
3099  // ABORT: +4 "subs pc, lr, #4"
3100  // UNDEF: +4/+2 "subs pc, lr, #0"
3101  // UNDEF varies depending on where the exception came from ARM or Thumb
3102  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3103 
3104  int64_t LROffset;
3105  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3106  IntKind == "ABORT")
3107  LROffset = 4;
3108  else if (IntKind == "SWI" || IntKind == "UNDEF")
3109  LROffset = 0;
3110  else
3111  report_fatal_error("Unsupported interrupt attribute. If present, value "
3112  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3113 
3114  RetOps.insert(RetOps.begin() + 1,
3115  DAG.getConstant(LROffset, DL, MVT::i32, false));
3116 
3117  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
3118 }
3119 
3120 SDValue
3121 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3122  bool isVarArg,
3123  const SmallVectorImpl<ISD::OutputArg> &Outs,
3124  const SmallVectorImpl<SDValue> &OutVals,
3125  const SDLoc &dl, SelectionDAG &DAG) const {
3126  // CCValAssign - represent the assignment of the return value to a location.
3128 
3129  // CCState - Info about the registers and stack slots.
3130  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3131  *DAG.getContext());
3132 
3133  // Analyze outgoing return values.
3134  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3135 
3136  SDValue Flag;
3137  SmallVector<SDValue, 4> RetOps;
3138  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3139  bool isLittleEndian = Subtarget->isLittle();
3140 
3141  MachineFunction &MF = DAG.getMachineFunction();
3143  AFI->setReturnRegsCount(RVLocs.size());
3144 
3145  // Report error if cmse entry function returns structure through first ptr arg.
3146  if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3147  // Note: using an empty SDLoc(), as the first line of the function is a
3148  // better place to report than the last line.
3151  "secure entry function would return value through pointer",
3152  SDLoc().getDebugLoc());
3153  DAG.getContext()->diagnose(Diag);
3154  }
3155 
3156  // Copy the result values into the output registers.
3157  for (unsigned i = 0, realRVLocIdx = 0;
3158  i != RVLocs.size();
3159  ++i, ++realRVLocIdx) {
3160  CCValAssign &VA = RVLocs[i];
3161  assert(VA.isRegLoc() && "Can only return in registers!");
3162 
3163  SDValue Arg = OutVals[realRVLocIdx];
3164  bool ReturnF16 = false;
3165 
3166  if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3167  // Half-precision return values can be returned like this:
3168  //
3169  // t11 f16 = fadd ...
3170  // t12: i16 = bitcast t11
3171  // t13: i32 = zero_extend t12
3172  // t14: f32 = bitcast t13 <~~~~~~~ Arg
3173  //
3174  // to avoid code generation for bitcasts, we simply set Arg to the node
3175  // that produces the f16 value, t11 in this case.
3176  //
3177  if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3178  SDValue ZE = Arg.getOperand(0);
3179  if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3180  SDValue BC = ZE.getOperand(0);
3181  if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3182  Arg = BC.getOperand(0);
3183  ReturnF16 = true;
3184  }
3185  }
3186  }
3187  }
3188 
3189  switch (VA.getLocInfo()) {
3190  default: llvm_unreachable("Unknown loc info!");
3191  case CCValAssign::Full: break;
3192  case CCValAssign::BCvt:
3193  if (!ReturnF16)
3194  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3195  break;
3196  }
3197 
3198  // Mask f16 arguments if this is a CMSE nonsecure entry.
3199  auto RetVT = Outs[realRVLocIdx].ArgVT;
3200  if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3201  if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3202  Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3203  } else {
3204  auto LocBits = VA.getLocVT().getSizeInBits();
3205  auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3206  SDValue Mask =
3207  DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3208  Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3209  Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3210  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3211  }
3212  }
3213 
3214  if (VA.needsCustom() &&
3215  (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3216  if (VA.getLocVT() == MVT::v2f64) {
3217  // Extract the first half and return it in two registers.
3219  DAG.getConstant(0, dl, MVT::i32));
3220  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3221  DAG.getVTList(MVT::i32, MVT::i32), Half);
3222 
3223  Chain =
3224  DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3225  HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag);
3226  Flag = Chain.getValue(1);
3227  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3228  VA = RVLocs[++i]; // skip ahead to next loc
3229  Chain =
3230  DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3231  HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag);
3232  Flag = Chain.getValue(1);
3233  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3234  VA = RVLocs[++i]; // skip ahead to next loc
3235 
3236  // Extract the 2nd half and fall through to handle it as an f64 value.
3238  DAG.getConstant(1, dl, MVT::i32));
3239  }
3240  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3241  // available.
3242  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3243  DAG.getVTList(MVT::i32, MVT::i32), Arg);
3244  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3245  fmrrd.getValue(isLittleEndian ? 0 : 1), Flag);
3246  Flag = Chain.getValue(1);
3247  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3248  VA = RVLocs[++i]; // skip ahead to next loc
3249  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3250  fmrrd.getValue(isLittleEndian ? 1 : 0), Flag);
3251  } else
3252  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
3253 
3254  // Guarantee that all emitted copies are
3255  // stuck together, avoiding something bad.
3256  Flag = Chain.getValue(1);
3257  RetOps.push_back(DAG.getRegister(
3258  VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3259  }
3260  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3261  const MCPhysReg *I =
3262  TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3263  if (I) {
3264  for (; *I; ++I) {
3265  if (ARM::GPRRegClass.contains(*I))
3266  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3267  else if (ARM::DPRRegClass.contains(*I))
3268  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3269  else
3270  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3271  }
3272  }
3273 
3274  // Update chain and glue.
3275  RetOps[0] = Chain;
3276  if (Flag.getNode())
3277  RetOps.push_back(Flag);
3278 
3279  // CPUs which aren't M-class use a special sequence to return from
3280  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3281  // though we use "subs pc, lr, #N").
3282  //
3283  // M-class CPUs actually use a normal return sequence with a special
3284  // (hardware-provided) value in LR, so the normal code path works.
3285  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3286  !Subtarget->isMClass()) {
3287  if (Subtarget->isThumb1Only())
3288  report_fatal_error("interrupt attribute is not supported in Thumb1");
3289  return LowerInterruptReturn(RetOps, dl, DAG);
3290  }
3291 
3294  return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3295 }
3296 
3297 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3298  if (N->getNumValues() != 1)
3299  return false;
3300  if (!N->hasNUsesOfValue(1, 0))
3301  return false;
3302 
3303  SDValue TCChain = Chain;
3304  SDNode *Copy = *N->use_begin();
3305  if (Copy->getOpcode() == ISD::CopyToReg) {
3306  // If the copy has a glue operand, we conservatively assume it isn't safe to
3307  // perform a tail call.
3308  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3309  return false;
3310  TCChain = Copy->getOperand(0);
3311  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3312  SDNode *VMov = Copy;
3313  // f64 returned in a pair of GPRs.
3315  for (SDNode *U : VMov->uses()) {
3316  if (U->getOpcode() != ISD::CopyToReg)
3317  return false;
3318  Copies.insert(U);
3319  }
3320  if (Copies.size() > 2)
3321  return false;
3322 
3323  for (SDNode *U : VMov->uses()) {
3324  SDValue UseChain = U->getOperand(0);
3325  if (Copies.count(UseChain.getNode()))
3326  // Second CopyToReg
3327  Copy = U;
3328  else {
3329  // We are at the top of this chain.
3330  // If the copy has a glue operand, we conservatively assume it
3331  // isn't safe to perform a tail call.
3332  if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3333  return false;
3334  // First CopyToReg
3335  TCChain = UseChain;
3336  }
3337  }
3338  } else if (Copy->getOpcode() == ISD::BITCAST) {
3339  // f32 returned in a single GPR.
3340  if (!Copy->hasOneUse())
3341  return false;
3342  Copy = *Copy->use_begin();
3343  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3344  return false;
3345  // If the copy has a glue operand, we conservatively assume it isn't safe to
3346  // perform a tail call.
3347  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3348  return false;
3349  TCChain = Copy->getOperand(0);
3350  } else {
3351  return false;
3352  }
3353 
3354  bool HasRet = false;
3355  for (const SDNode *U : Copy->uses()) {
3356  if (U->getOpcode() != ARMISD::RET_FLAG &&
3357  U->getOpcode() != ARMISD::INTRET_FLAG)
3358  return false;
3359  HasRet = true;
3360  }
3361 
3362  if (!HasRet)
3363  return false;
3364 
3365  Chain = TCChain;
3366  return true;
3367 }
3368 
3369 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3370  if (!Subtarget->supportsTailCall())
3371  return false;
3372 
3373  if (!CI->isTailCall())
3374  return false;
3375 
3376  return true;
3377 }
3378 
3379 // Trying to write a 64 bit value so need to split into two 32 bit values first,
3380 // and pass the lower and high parts through.
3382  SDLoc DL(Op);
3383  SDValue WriteValue = Op->getOperand(2);
3384 
3385  // This function is only supposed to be called for i64 type argument.
3386  assert(WriteValue.getValueType() == MVT::i64
3387  && "LowerWRITE_REGISTER called for non-i64 type argument.");
3388 
3389  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3390  DAG.getConstant(0, DL, MVT::i32));
3391  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3392  DAG.getConstant(1, DL, MVT::i32));
3393  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3394  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3395 }
3396 
3397 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3398 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3399 // one of the above mentioned nodes. It has to be wrapped because otherwise
3400 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3401 // be used to form addressing mode. These wrapped nodes will be selected
3402 // into MOVi.
3403 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3404  SelectionDAG &DAG) const {
3405  EVT PtrVT = Op.getValueType();
3406  // FIXME there is no actual debug info here
3407  SDLoc dl(Op);
3408  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3409  SDValue Res;
3410 
3411  // When generating execute-only code Constant Pools must be promoted to the
3412  // global data section. It's a bit ugly that we can't share them across basic
3413  // blocks, but this way we guarantee that execute-only behaves correct with
3414  // position-independent addressing modes.
3415  if (Subtarget->genExecuteOnly()) {
3416  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3417  auto T = const_cast<Type*>(CP->getType());
3418  auto C = const_cast<Constant*>(CP->getConstVal());
3419  auto M = const_cast<Module*>(DAG.getMachineFunction().
3420  getFunction().getParent());
3421  auto GV = new GlobalVariable(
3422  *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3423  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
3424  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
3425  Twine(AFI->createPICLabelUId())
3426  );
3427  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3428  dl, PtrVT);
3429  return LowerGlobalAddress(GA, DAG);
3430  }
3431 
3432  if (CP->isMachineConstantPoolEntry())
3433  Res =
3434  DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3435  else
3436  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign());
3437  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3438 }
3439 
3442 }
3443 
3444 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3445  SelectionDAG &DAG) const {
3446  MachineFunction &MF = DAG.getMachineFunction();
3448  unsigned ARMPCLabelIndex = 0;
3449  SDLoc DL(Op);
3450  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3451  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3452  SDValue CPAddr;
3453  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3454  if (!IsPositionIndependent) {
3455  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3456  } else {
3457  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3458  ARMPCLabelIndex = AFI->createPICLabelUId();
3459  ARMConstantPoolValue *CPV =
3460  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3461  ARMCP::CPBlockAddress, PCAdj);
3462  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3463  }
3464  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3465  SDValue Result = DAG.getLoad(
3466  PtrVT, DL, DAG.getEntryNode(), CPAddr,
3468  if (!IsPositionIndependent)
3469  return Result;
3470  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3471  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3472 }
3473 
3474 /// Convert a TLS address reference into the correct sequence of loads
3475 /// and calls to compute the variable's address for Darwin, and return an
3476 /// SDValue containing the final node.
3477 
3478 /// Darwin only has one TLS scheme which must be capable of dealing with the
3479 /// fully general situation, in the worst case. This means:
3480 /// + "extern __thread" declaration.
3481 /// + Defined in a possibly unknown dynamic library.
3482 ///
3483 /// The general system is that each __thread variable has a [3 x i32] descriptor
3484 /// which contains information used by the runtime to calculate the address. The
3485 /// only part of this the compiler needs to know about is the first word, which
3486 /// contains a function pointer that must be called with the address of the
3487 /// entire descriptor in "r0".
3488 ///
3489 /// Since this descriptor may be in a different unit, in general access must
3490 /// proceed along the usual ARM rules. A common sequence to produce is:
3491 ///
3492 /// movw rT1, :lower16:_var$non_lazy_ptr
3493 /// movt rT1, :upper16:_var$non_lazy_ptr
3494 /// ldr r0, [rT1]
3495 /// ldr rT2, [r0]
3496 /// blx rT2
3497 /// [...address now in r0...]
3498 SDValue
3499 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3500  SelectionDAG &DAG) const {
3501  assert(Subtarget->isTargetDarwin() &&
3502  "This function expects a Darwin target");
3503  SDLoc DL(Op);
3504 
3505  // First step is to get the address of the actua global symbol. This is where
3506  // the TLS descriptor lives.
3507  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3508 
3509  // The first entry in the descriptor is a function pointer that we must call
3510  // to obtain the address of the variable.
3511  SDValue Chain = DAG.getEntryNode();
3512  SDValue FuncTLVGet = DAG.getLoad(
3513  MVT::i32, DL, Chain, DescAddr,
3517  Chain = FuncTLVGet.getValue(1);
3518 
3520  MachineFrameInfo &MFI = F.getFrameInfo();
3521  MFI.setAdjustsStack(true);
3522 
3523  // TLS calls preserve all registers except those that absolutely must be
3524  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3525  // silly).
3526  auto TRI =
3527  getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3528  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3530 
3531  // Finally, we can make the call. This is just a degenerate version of a
3532  // normal AArch64 call node: r0 takes the address of the descriptor, and
3533  // returns the address of the variable in this thread.
3534  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3535  Chain =
3537  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3538  DAG.getRegisterMask(Mask), Chain.getValue(1));
3539  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3540 }
3541 
3542 SDValue
3543 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3544  SelectionDAG &DAG) const {
3545  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3546 
3547  SDValue Chain = DAG.getEntryNode();
3548  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3549  SDLoc DL(Op);
3550 
3551  // Load the current TEB (thread environment block)
3552  SDValue Ops[] = {Chain,
3553  DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3554  DAG.getTargetConstant(15, DL, MVT::i32),
3555  DAG.getTargetConstant(0, DL, MVT::i32),
3556  DAG.getTargetConstant(13, DL, MVT::i32),
3557  DAG.getTargetConstant(0, DL, MVT::i32),
3558  DAG.getTargetConstant(2, DL, MVT::i32)};
3559  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3560  DAG.getVTList(MVT::i32, MVT::Other), Ops);
3561 
3562  SDValue TEB = CurrentTEB.getValue(0);
3563  Chain = CurrentTEB.getValue(1);
3564 
3565  // Load the ThreadLocalStoragePointer from the TEB
3566  // A pointer to the TLS array is located at offset 0x2c from the TEB.
3567  SDValue TLSArray =
3568  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3569  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3570 
3571  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3572  // offset into the TLSArray.
3573 
3574  // Load the TLS index from the C runtime
3575  SDValue TLSIndex =
3576  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3577  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3578  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3579 
3580  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3581  DAG.getConstant(2, DL, MVT::i32));
3582  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3583  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3584  MachinePointerInfo());
3585 
3586  // Get the offset of the start of the .tls section (section base)
3587  const auto *GA = cast<GlobalAddressSDNode>(Op);
3588  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3589  SDValue Offset = DAG.getLoad(
3590  PtrVT, DL, Chain,
3592  DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3594 
3595  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3596 }
3597 
3598 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
3599 SDValue
3600 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3601  SelectionDAG &DAG) const {
3602  SDLoc dl(GA);
3603  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3604  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3605  MachineFunction &MF = DAG.getMachineFunction();
3607  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3608  ARMConstantPoolValue *CPV =
3609  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3610  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3611  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3613  Argument = DAG.getLoad(
3614  PtrVT, dl, DAG.getEntryNode(), Argument,
3616  SDValue Chain = Argument.getValue(1);
3617 
3618  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3619  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3620 
3621  // call __tls_get_addr.
3622  ArgListTy Args;
3623  ArgListEntry Entry;
3624  Entry.Node = Argument;
3625  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3626  Args.push_back(Entry);
3627 
3628  // FIXME: is there useful debug info available here?
3630  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3632  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3633 
3634  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3635  return CallResult.first;
3636 }
3637 
3638 // Lower ISD::GlobalTLSAddress using the "initial exec" or
3639 // "local exec" model.
3640 SDValue
3641 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3642  SelectionDAG &DAG,
3643  TLSModel::Model model) const {
3644  const GlobalValue *GV = GA->getGlobal();
3645  SDLoc dl(GA);
3646  SDValue Offset;
3647  SDValue Chain = DAG.getEntryNode();
3648  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3649  // Get the Thread Pointer
3651 
3652  if (model == TLSModel::InitialExec) {
3653  MachineFunction &MF = DAG.getMachineFunction();
3655  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3656  // Initial exec model.
3657  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3658  ARMConstantPoolValue *CPV =
3659  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3661  true);
3662  Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3664  Offset = DAG.getLoad(
3665  PtrVT, dl, Chain, Offset,
3667  Chain = Offset.getValue(1);
3668 
3669  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3670  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3671 
3672  Offset = DAG.getLoad(
3673  PtrVT, dl, Chain, Offset,
3675  } else {
3676  // local exec model
3678  ARMConstantPoolValue *CPV =
3680  Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3682  Offset = DAG.getLoad(
3683  PtrVT, dl, Chain, Offset,
3685  }
3686 
3687  // The address of the thread local variable is the add of the thread
3688  // pointer with the offset of the variable.
3689  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3690 }
3691 
3692 SDValue
3693 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3694  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3695  if (DAG.getTarget().useEmulatedTLS())
3696  return LowerToTLSEmulatedModel(GA, DAG);
3697 
3698  if (Subtarget->isTargetDarwin())
3699  return LowerGlobalTLSAddressDarwin(Op, DAG);
3700 
3701  if (Subtarget->isTargetWindows())
3702  return LowerGlobalTLSAddressWindows(Op, DAG);
3703 
3704  // TODO: implement the "local dynamic" model
3705  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3707 
3708  switch (model) {
3711  return LowerToTLSGeneralDynamicModel(GA, DAG);
3712  case TLSModel::InitialExec:
3713  case TLSModel::LocalExec:
3714  return LowerToTLSExecModels(GA, DAG, model);
3715  }
3716  llvm_unreachable("bogus TLS model");
3717 }
3718 
3719 /// Return true if all users of V are within function F, looking through
3720 /// ConstantExprs.
3721 static bool allUsersAreInFunction(const Value *V, const Function *F) {
3722  SmallVector<const User*,4> Worklist(V->users());
3723  while (!Worklist.empty()) {
3724  auto *U = Worklist.pop_back_val();
3725  if (isa<ConstantExpr>(U)) {
3726  append_range(Worklist, U->users());
3727  continue;
3728  }
3729 
3730  auto *I = dyn_cast<Instruction>(U);
3731  if (!I || I->getParent()->getParent() != F)
3732  return false;
3733  }
3734  return true;
3735 }
3736 
3738  const GlobalValue *GV, SelectionDAG &DAG,
3739  EVT PtrVT, const SDLoc &dl) {
3740  // If we're creating a pool entry for a constant global with unnamed address,
3741  // and the global is small enough, we can emit it inline into the constant pool
3742  // to save ourselves an indirection.
3743  //
3744  // This is a win if the constant is only used in one function (so it doesn't
3745  // need to be duplicated) or duplicating the constant wouldn't increase code
3746  // size (implying the constant is no larger than 4 bytes).
3747  const Function &F = DAG.getMachineFunction().getFunction();
3748 
3749  // We rely on this decision to inline being idemopotent and unrelated to the
3750  // use-site. We know that if we inline a variable at one use site, we'll
3751  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3752  // doesn't know about this optimization, so bail out if it's enabled else
3753  // we could decide to inline here (and thus never emit the GV) but require
3754  // the GV from fast-isel generated code.