LLVM  14.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that ARM uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMISelLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMCallingConv.h"
18 #include "ARMConstantPoolValue.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMPerfectShuffle.h"
21 #include "ARMRegisterInfo.h"
22 #include "ARMSelectionDAGInfo.h"
23 #include "ARMSubtarget.h"
24 #include "ARMTargetTransformInfo.h"
27 #include "Utils/ARMBaseInfo.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/BitVector.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/ADT/StringExtras.h"
38 #include "llvm/ADT/StringRef.h"
39 #include "llvm/ADT/StringSwitch.h"
40 #include "llvm/ADT/Triple.h"
41 #include "llvm/ADT/Twine.h"
66 #include "llvm/IR/Attributes.h"
67 #include "llvm/IR/CallingConv.h"
68 #include "llvm/IR/Constant.h"
69 #include "llvm/IR/Constants.h"
70 #include "llvm/IR/DataLayout.h"
71 #include "llvm/IR/DebugLoc.h"
72 #include "llvm/IR/DerivedTypes.h"
73 #include "llvm/IR/Function.h"
74 #include "llvm/IR/GlobalAlias.h"
75 #include "llvm/IR/GlobalValue.h"
76 #include "llvm/IR/GlobalVariable.h"
77 #include "llvm/IR/IRBuilder.h"
78 #include "llvm/IR/InlineAsm.h"
79 #include "llvm/IR/Instruction.h"
80 #include "llvm/IR/Instructions.h"
81 #include "llvm/IR/IntrinsicInst.h"
82 #include "llvm/IR/Intrinsics.h"
83 #include "llvm/IR/IntrinsicsARM.h"
84 #include "llvm/IR/Module.h"
85 #include "llvm/IR/PatternMatch.h"
86 #include "llvm/IR/Type.h"
87 #include "llvm/IR/User.h"
88 #include "llvm/IR/Value.h"
89 #include "llvm/MC/MCInstrDesc.h"
91 #include "llvm/MC/MCRegisterInfo.h"
92 #include "llvm/MC/MCSchedule.h"
95 #include "llvm/Support/Casting.h"
96 #include "llvm/Support/CodeGen.h"
98 #include "llvm/Support/Compiler.h"
99 #include "llvm/Support/Debug.h"
101 #include "llvm/Support/KnownBits.h"
103 #include "llvm/Support/MathExtras.h"
107 #include <algorithm>
108 #include <cassert>
109 #include <cstdint>
110 #include <cstdlib>
111 #include <iterator>
112 #include <limits>
113 #include <string>
114 #include <tuple>
115 #include <utility>
116 #include <vector>
117 
118 using namespace llvm;
119 using namespace llvm::PatternMatch;
120 
121 #define DEBUG_TYPE "arm-isel"
122 
123 STATISTIC(NumTailCalls, "Number of tail calls");
124 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
125 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
126 STATISTIC(NumConstpoolPromoted,
127  "Number of constants with their storage promoted into constant pools");
128 
129 static cl::opt<bool>
130 ARMInterworking("arm-interworking", cl::Hidden,
131  cl::desc("Enable / disable ARM interworking (for debugging only)"),
132  cl::init(true));
133 
135  "arm-promote-constant", cl::Hidden,
136  cl::desc("Enable / disable promotion of unnamed_addr constants into "
137  "constant pools"),
138  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
140  "arm-promote-constant-max-size", cl::Hidden,
141  cl::desc("Maximum size of constant to promote into a constant pool"),
142  cl::init(64));
144  "arm-promote-constant-max-total", cl::Hidden,
145  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
146  cl::init(128));
147 
149 MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
150  cl::desc("Maximum interleave factor for MVE VLDn to generate."),
151  cl::init(2));
152 
153 // The APCS parameter registers.
154 static const MCPhysReg GPRArgRegs[] = {
155  ARM::R0, ARM::R1, ARM::R2, ARM::R3
156 };
157 
158 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
159  if (VT != PromotedLdStVT) {
160  setOperationAction(ISD::LOAD, VT, Promote);
161  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
162 
163  setOperationAction(ISD::STORE, VT, Promote);
164  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
165  }
166 
167  MVT ElemTy = VT.getVectorElementType();
168  if (ElemTy != MVT::f64)
169  setOperationAction(ISD::SETCC, VT, Custom);
170  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
171  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
172  if (ElemTy == MVT::i32) {
173  setOperationAction(ISD::SINT_TO_FP, VT, Custom);
174  setOperationAction(ISD::UINT_TO_FP, VT, Custom);
175  setOperationAction(ISD::FP_TO_SINT, VT, Custom);
176  setOperationAction(ISD::FP_TO_UINT, VT, Custom);
177  } else {
178  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
179  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
180  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
181  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
182  }
183  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
184  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
185  setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
186  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
187  setOperationAction(ISD::SELECT, VT, Expand);
188  setOperationAction(ISD::SELECT_CC, VT, Expand);
189  setOperationAction(ISD::VSELECT, VT, Expand);
190  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
191  if (VT.isInteger()) {
192  setOperationAction(ISD::SHL, VT, Custom);
193  setOperationAction(ISD::SRA, VT, Custom);
194  setOperationAction(ISD::SRL, VT, Custom);
195  }
196 
197  // Neon does not support vector divide/remainder operations.
198  setOperationAction(ISD::SDIV, VT, Expand);
199  setOperationAction(ISD::UDIV, VT, Expand);
200  setOperationAction(ISD::FDIV, VT, Expand);
201  setOperationAction(ISD::SREM, VT, Expand);
202  setOperationAction(ISD::UREM, VT, Expand);
203  setOperationAction(ISD::FREM, VT, Expand);
204  setOperationAction(ISD::SDIVREM, VT, Expand);
205  setOperationAction(ISD::UDIVREM, VT, Expand);
206 
207  if (!VT.isFloatingPoint() &&
208  VT != MVT::v2i64 && VT != MVT::v1i64)
209  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
210  setOperationAction(Opcode, VT, Legal);
211  if (!VT.isFloatingPoint())
212  for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
213  setOperationAction(Opcode, VT, Legal);
214 }
215 
216 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
217  addRegisterClass(VT, &ARM::DPRRegClass);
218  addTypeForNEON(VT, MVT::f64);
219 }
220 
221 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
222  addRegisterClass(VT, &ARM::DPairRegClass);
223  addTypeForNEON(VT, MVT::v2f64);
224 }
225 
226 void ARMTargetLowering::setAllExpand(MVT VT) {
227  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
228  setOperationAction(Opc, VT, Expand);
229 
230  // We support these really simple operations even on types where all
231  // the actual arithmetic has to be broken down into simpler
232  // operations or turned into library calls.
233  setOperationAction(ISD::BITCAST, VT, Legal);
234  setOperationAction(ISD::LOAD, VT, Legal);
235  setOperationAction(ISD::STORE, VT, Legal);
236  setOperationAction(ISD::UNDEF, VT, Legal);
237 }
238 
239 void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
240  LegalizeAction Action) {
241  setLoadExtAction(ISD::EXTLOAD, From, To, Action);
242  setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
243  setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
244 }
245 
246 void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
247  const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
248 
249  for (auto VT : IntTypes) {
250  addRegisterClass(VT, &ARM::MQPRRegClass);
251  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
252  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
253  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
254  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
255  setOperationAction(ISD::SHL, VT, Custom);
256  setOperationAction(ISD::SRA, VT, Custom);
257  setOperationAction(ISD::SRL, VT, Custom);
258  setOperationAction(ISD::SMIN, VT, Legal);
259  setOperationAction(ISD::SMAX, VT, Legal);
260  setOperationAction(ISD::UMIN, VT, Legal);
261  setOperationAction(ISD::UMAX, VT, Legal);
262  setOperationAction(ISD::ABS, VT, Legal);
263  setOperationAction(ISD::SETCC, VT, Custom);
264  setOperationAction(ISD::MLOAD, VT, Custom);
265  setOperationAction(ISD::MSTORE, VT, Legal);
266  setOperationAction(ISD::CTLZ, VT, Legal);
267  setOperationAction(ISD::CTTZ, VT, Custom);
268  setOperationAction(ISD::BITREVERSE, VT, Legal);
269  setOperationAction(ISD::BSWAP, VT, Legal);
270  setOperationAction(ISD::SADDSAT, VT, Legal);
271  setOperationAction(ISD::UADDSAT, VT, Legal);
272  setOperationAction(ISD::SSUBSAT, VT, Legal);
273  setOperationAction(ISD::USUBSAT, VT, Legal);
274  setOperationAction(ISD::ABDS, VT, Legal);
275  setOperationAction(ISD::ABDU, VT, Legal);
276 
277  // No native support for these.
278  setOperationAction(ISD::UDIV, VT, Expand);
279  setOperationAction(ISD::SDIV, VT, Expand);
280  setOperationAction(ISD::UREM, VT, Expand);
281  setOperationAction(ISD::SREM, VT, Expand);
282  setOperationAction(ISD::UDIVREM, VT, Expand);
283  setOperationAction(ISD::SDIVREM, VT, Expand);
284  setOperationAction(ISD::CTPOP, VT, Expand);
285  setOperationAction(ISD::SELECT, VT, Expand);
286  setOperationAction(ISD::SELECT_CC, VT, Expand);
287 
288  // Vector reductions
289  setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
290  setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
291  setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
292  setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
293  setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
294  setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
295  setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
296  setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
297  setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
298 
299  if (!HasMVEFP) {
300  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
301  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
302  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
303  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
304  } else {
305  setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
306  setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
307  }
308 
309  // Pre and Post inc are supported on loads and stores
310  for (unsigned im = (unsigned)ISD::PRE_INC;
311  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
312  setIndexedLoadAction(im, VT, Legal);
313  setIndexedStoreAction(im, VT, Legal);
314  setIndexedMaskedLoadAction(im, VT, Legal);
315  setIndexedMaskedStoreAction(im, VT, Legal);
316  }
317  }
318 
319  const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
320  for (auto VT : FloatTypes) {
321  addRegisterClass(VT, &ARM::MQPRRegClass);
322  if (!HasMVEFP)
323  setAllExpand(VT);
324 
325  // These are legal or custom whether we have MVE.fp or not
326  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
327  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
328  setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
329  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
330  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
331  setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
332  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
333  setOperationAction(ISD::SETCC, VT, Custom);
334  setOperationAction(ISD::MLOAD, VT, Custom);
335  setOperationAction(ISD::MSTORE, VT, Legal);
336  setOperationAction(ISD::SELECT, VT, Expand);
337  setOperationAction(ISD::SELECT_CC, VT, Expand);
338 
339  // Pre and Post inc are supported on loads and stores
340  for (unsigned im = (unsigned)ISD::PRE_INC;
341  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
342  setIndexedLoadAction(im, VT, Legal);
343  setIndexedStoreAction(im, VT, Legal);
344  setIndexedMaskedLoadAction(im, VT, Legal);
345  setIndexedMaskedStoreAction(im, VT, Legal);
346  }
347 
348  if (HasMVEFP) {
349  setOperationAction(ISD::FMINNUM, VT, Legal);
350  setOperationAction(ISD::FMAXNUM, VT, Legal);
351  setOperationAction(ISD::FROUND, VT, Legal);
352  setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
353  setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
354  setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
355  setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
356 
357  // No native support for these.
358  setOperationAction(ISD::FDIV, VT, Expand);
359  setOperationAction(ISD::FREM, VT, Expand);
360  setOperationAction(ISD::FSQRT, VT, Expand);
361  setOperationAction(ISD::FSIN, VT, Expand);
362  setOperationAction(ISD::FCOS, VT, Expand);
363  setOperationAction(ISD::FPOW, VT, Expand);
364  setOperationAction(ISD::FLOG, VT, Expand);
365  setOperationAction(ISD::FLOG2, VT, Expand);
366  setOperationAction(ISD::FLOG10, VT, Expand);
367  setOperationAction(ISD::FEXP, VT, Expand);
368  setOperationAction(ISD::FEXP2, VT, Expand);
369  setOperationAction(ISD::FNEARBYINT, VT, Expand);
370  }
371  }
372 
373  // Custom Expand smaller than legal vector reductions to prevent false zero
374  // items being added.
375  setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom);
376  setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom);
377  setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom);
378  setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom);
379  setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
380  setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom);
381  setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom);
382  setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom);
383 
384  // We 'support' these types up to bitcast/load/store level, regardless of
385  // MVE integer-only / float support. Only doing FP data processing on the FP
386  // vector types is inhibited at integer-only level.
387  const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
388  for (auto VT : LongTypes) {
389  addRegisterClass(VT, &ARM::MQPRRegClass);
390  setAllExpand(VT);
391  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
392  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
393  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
394  }
395  setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
396 
397  // We can do bitwise operations on v2i64 vectors
398  setOperationAction(ISD::AND, MVT::v2i64, Legal);
399  setOperationAction(ISD::OR, MVT::v2i64, Legal);
400  setOperationAction(ISD::XOR, MVT::v2i64, Legal);
401 
402  // It is legal to extload from v4i8 to v4i16 or v4i32.
403  addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
404  addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
405  addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
406 
407  // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
408  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
409  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
410  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
411  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal);
412  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal);
413 
414  // Some truncating stores are legal too.
415  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
416  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
417  setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
418 
419  // Pre and Post inc on these are legal, given the correct extends
420  for (unsigned im = (unsigned)ISD::PRE_INC;
421  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
422  for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
423  setIndexedLoadAction(im, VT, Legal);
424  setIndexedStoreAction(im, VT, Legal);
425  setIndexedMaskedLoadAction(im, VT, Legal);
426  setIndexedMaskedStoreAction(im, VT, Legal);
427  }
428  }
429 
430  // Predicate types
431  const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1};
432  for (auto VT : pTypes) {
433  addRegisterClass(VT, &ARM::VCCRRegClass);
434  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
435  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
436  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
437  setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
438  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
439  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
440  setOperationAction(ISD::SETCC, VT, Custom);
441  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
442  setOperationAction(ISD::LOAD, VT, Custom);
443  setOperationAction(ISD::STORE, VT, Custom);
444  setOperationAction(ISD::TRUNCATE, VT, Custom);
445  setOperationAction(ISD::VSELECT, VT, Expand);
446  setOperationAction(ISD::SELECT, VT, Expand);
447  }
448  setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
449  setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
450  setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
451  setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
452  setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
453  setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
454  setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
455  setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
456 }
457 
459  const ARMSubtarget &STI)
460  : TargetLowering(TM), Subtarget(&STI) {
461  RegInfo = Subtarget->getRegisterInfo();
462  Itins = Subtarget->getInstrItineraryData();
463 
466 
467  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
468  !Subtarget->isTargetWatchOS()) {
469  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
470  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
471  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
472  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
474  }
475 
476  if (Subtarget->isTargetMachO()) {
477  // Uses VFP for Thumb libfuncs if available.
478  if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
479  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
480  static const struct {
481  const RTLIB::Libcall Op;
482  const char * const Name;
483  const ISD::CondCode Cond;
484  } LibraryCalls[] = {
485  // Single-precision floating-point arithmetic.
486  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
487  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
488  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
489  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
490 
491  // Double-precision floating-point arithmetic.
492  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
493  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
494  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
495  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
496 
497  // Single-precision comparisons.
498  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
499  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
500  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
501  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
502  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
503  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
504  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
505 
506  // Double-precision comparisons.
507  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
508  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
509  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
510  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
511  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
512  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
513  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
514 
515  // Floating-point to integer conversions.
516  // i64 conversions are done via library routines even when generating VFP
517  // instructions, so use the same ones.
518  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
519  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
520  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
521  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
522 
523  // Conversions between floating types.
524  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
525  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
526 
527  // Integer to floating-point conversions.
528  // i64 conversions are done via library routines even when generating VFP
529  // instructions, so use the same ones.
530  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
531  // e.g., __floatunsidf vs. __floatunssidfvfp.
532  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
533  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
534  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
535  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
536  };
537 
538  for (const auto &LC : LibraryCalls) {
539  setLibcallName(LC.Op, LC.Name);
540  if (LC.Cond != ISD::SETCC_INVALID)
541  setCmpLibcallCC(LC.Op, LC.Cond);
542  }
543  }
544  }
545 
546  // These libcalls are not available in 32-bit.
547  setLibcallName(RTLIB::SHL_I128, nullptr);
548  setLibcallName(RTLIB::SRL_I128, nullptr);
549  setLibcallName(RTLIB::SRA_I128, nullptr);
550  setLibcallName(RTLIB::MUL_I128, nullptr);
551  setLibcallName(RTLIB::MULO_I64, nullptr);
552  setLibcallName(RTLIB::MULO_I128, nullptr);
553 
554  // RTLIB
555  if (Subtarget->isAAPCS_ABI() &&
556  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
557  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
558  static const struct {
559  const RTLIB::Libcall Op;
560  const char * const Name;
561  const CallingConv::ID CC;
562  const ISD::CondCode Cond;
563  } LibraryCalls[] = {
564  // Double-precision floating-point arithmetic helper functions
565  // RTABI chapter 4.1.2, Table 2
566  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
567  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
568  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
569  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
570 
571  // Double-precision floating-point comparison helper functions
572  // RTABI chapter 4.1.2, Table 3
573  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
574  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
575  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
576  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
577  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
578  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
579  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
580 
581  // Single-precision floating-point arithmetic helper functions
582  // RTABI chapter 4.1.2, Table 4
583  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
584  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
585  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
586  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
587 
588  // Single-precision floating-point comparison helper functions
589  // RTABI chapter 4.1.2, Table 5
590  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
591  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
592  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
593  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
594  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
595  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
596  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
597 
598  // Floating-point to integer conversions.
599  // RTABI chapter 4.1.2, Table 6
600  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
601  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
602  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
603  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
604  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
605  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
606  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
607  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
608 
609  // Conversions between floating types.
610  // RTABI chapter 4.1.2, Table 7
611  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
612  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
613  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
614 
615  // Integer to floating-point conversions.
616  // RTABI chapter 4.1.2, Table 8
617  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
618  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
619  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
620  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
621  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
622  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
623  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
624  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
625 
626  // Long long helper functions
627  // RTABI chapter 4.2, Table 9
628  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
629  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
630  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
631  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632 
633  // Integer division functions
634  // RTABI chapter 4.3.1
635  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
636  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
639  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
640  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
641  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
642  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
643  };
644 
645  for (const auto &LC : LibraryCalls) {
646  setLibcallName(LC.Op, LC.Name);
647  setLibcallCallingConv(LC.Op, LC.CC);
648  if (LC.Cond != ISD::SETCC_INVALID)
649  setCmpLibcallCC(LC.Op, LC.Cond);
650  }
651 
652  // EABI dependent RTLIB
653  if (TM.Options.EABIVersion == EABI::EABI4 ||
654  TM.Options.EABIVersion == EABI::EABI5) {
655  static const struct {
656  const RTLIB::Libcall Op;
657  const char *const Name;
658  const CallingConv::ID CC;
659  const ISD::CondCode Cond;
660  } MemOpsLibraryCalls[] = {
661  // Memory operations
662  // RTABI chapter 4.3.4
664  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
665  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
666  };
667 
668  for (const auto &LC : MemOpsLibraryCalls) {
669  setLibcallName(LC.Op, LC.Name);
670  setLibcallCallingConv(LC.Op, LC.CC);
671  if (LC.Cond != ISD::SETCC_INVALID)
672  setCmpLibcallCC(LC.Op, LC.Cond);
673  }
674  }
675  }
676 
677  if (Subtarget->isTargetWindows()) {
678  static const struct {
679  const RTLIB::Libcall Op;
680  const char * const Name;
681  const CallingConv::ID CC;
682  } LibraryCalls[] = {
683  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
684  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
685  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
686  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
687  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
688  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
689  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
690  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
691  };
692 
693  for (const auto &LC : LibraryCalls) {
694  setLibcallName(LC.Op, LC.Name);
695  setLibcallCallingConv(LC.Op, LC.CC);
696  }
697  }
698 
699  // Use divmod compiler-rt calls for iOS 5.0 and later.
700  if (Subtarget->isTargetMachO() &&
701  !(Subtarget->isTargetIOS() &&
702  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
703  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
704  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
705  }
706 
707  // The half <-> float conversion functions are always soft-float on
708  // non-watchos platforms, but are needed for some targets which use a
709  // hard-float calling convention by default.
710  if (!Subtarget->isTargetWatchABI()) {
711  if (Subtarget->isAAPCS_ABI()) {
712  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
713  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
714  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
715  } else {
716  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
717  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
718  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
719  }
720  }
721 
722  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
723  // a __gnu_ prefix (which is the default).
724  if (Subtarget->isTargetAEABI()) {
725  static const struct {
726  const RTLIB::Libcall Op;
727  const char * const Name;
728  const CallingConv::ID CC;
729  } LibraryCalls[] = {
730  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
731  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
732  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
733  };
734 
735  for (const auto &LC : LibraryCalls) {
736  setLibcallName(LC.Op, LC.Name);
737  setLibcallCallingConv(LC.Op, LC.CC);
738  }
739  }
740 
741  if (Subtarget->isThumb1Only())
742  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
743  else
744  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
745 
746  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
747  Subtarget->hasFPRegs()) {
748  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
749  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
750 
755 
756  if (!Subtarget->hasVFP2Base())
757  setAllExpand(MVT::f32);
758  if (!Subtarget->hasFP64())
759  setAllExpand(MVT::f64);
760  }
761 
762  if (Subtarget->hasFullFP16()) {
763  addRegisterClass(MVT::f16, &ARM::HPRRegClass);
766 
769  }
770 
771  if (Subtarget->hasBF16()) {
772  addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
773  setAllExpand(MVT::bf16);
774  if (!Subtarget->hasFullFP16())
776  }
777 
778  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
779  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
780  setTruncStoreAction(VT, InnerVT, Expand);
781  addAllExtLoads(VT, InnerVT, Expand);
782  }
783 
786 
788  }
789 
792 
795 
796  if (Subtarget->hasMVEIntegerOps())
797  addMVEVectorTypes(Subtarget->hasMVEFloatOps());
798 
799  // Combine low-overhead loop intrinsics so that we can lower i1 types.
800  if (Subtarget->hasLOB()) {
803  }
804 
805  if (Subtarget->hasNEON()) {
806  addDRTypeForNEON(MVT::v2f32);
807  addDRTypeForNEON(MVT::v8i8);
808  addDRTypeForNEON(MVT::v4i16);
809  addDRTypeForNEON(MVT::v2i32);
810  addDRTypeForNEON(MVT::v1i64);
811 
812  addQRTypeForNEON(MVT::v4f32);
813  addQRTypeForNEON(MVT::v2f64);
814  addQRTypeForNEON(MVT::v16i8);
815  addQRTypeForNEON(MVT::v8i16);
816  addQRTypeForNEON(MVT::v4i32);
817  addQRTypeForNEON(MVT::v2i64);
818 
819  if (Subtarget->hasFullFP16()) {
820  addQRTypeForNEON(MVT::v8f16);
821  addDRTypeForNEON(MVT::v4f16);
822  }
823 
824  if (Subtarget->hasBF16()) {
825  addQRTypeForNEON(MVT::v8bf16);
826  addDRTypeForNEON(MVT::v4bf16);
827  }
828  }
829 
830  if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
831  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
832  // none of Neon, MVE or VFP supports any arithmetic operations on it.
836  // FIXME: Code duplication: FDIV and FREM are expanded always, see
837  // ARMTargetLowering::addTypeForNEON method for details.
840  // FIXME: Create unittest.
841  // In another words, find a way when "copysign" appears in DAG with vector
842  // operands.
844  // FIXME: Code duplication: SETCC has custom operation action, see
845  // ARMTargetLowering::addTypeForNEON method for details.
847  // FIXME: Create unittest for FNEG and for FABS.
859  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
866  }
867 
868  if (Subtarget->hasNEON()) {
869  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
870  // supported for v4f32.
885 
886  // Mark v2f32 intrinsics.
901 
902  // Neon does not support some operations on v1i64 and v2i64 types.
904  // Custom handling for some quad-vector types to detect VMULL.
908  // Custom handling for some vector types to avoid expensive expansions
913  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
914  // a destination type that is wider than the source, and nor does
915  // it have a FP_TO_[SU]INT instruction with a narrower destination than
916  // source.
925 
928 
929  // NEON does not have single instruction CTPOP for vectors with element
930  // types wider than 8-bits. However, custom lowering can leverage the
931  // v8i8/v16i8 vcnt instruction.
938 
941 
942  // NEON does not have single instruction CTTZ for vectors.
947 
952 
957 
962 
963  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
966  }
967 
968  // NEON only has FMA instructions as of VFP4.
969  if (!Subtarget->hasVFP4Base()) {
972  }
973 
981 
982  // It is legal to extload from v4i8 to v4i16 or v4i32.
984  MVT::v2i32}) {
989  }
990  }
991  }
992 
993  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1010  }
1011  if (Subtarget->hasMVEIntegerOps()) {
1019  }
1020 
1021  if (!Subtarget->hasFP64()) {
1022  // When targeting a floating-point unit with only single-precision
1023  // operations, f64 is legal for the few double-precision instructions which
1024  // are present However, no double-precision operations other than moves,
1025  // loads and stores are provided by the hardware.
1062  }
1063 
1064  if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1067  if (Subtarget->hasFullFP16()) {
1070  }
1071  }
1072 
1073  if (!Subtarget->hasFP16()) {
1076  }
1077 
1079 
1080  // ARM does not have floating-point extending loads.
1081  for (MVT VT : MVT::fp_valuetypes()) {
1084  }
1085 
1086  // ... or truncating stores
1090 
1091  // ARM does not have i1 sign extending load.
1092  for (MVT VT : MVT::integer_valuetypes())
1094 
1095  // ARM supports all 4 flavors of integer indexed load / store.
1096  if (!Subtarget->isThumb1Only()) {
1097  for (unsigned im = (unsigned)ISD::PRE_INC;
1098  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1107  }
1108  } else {
1109  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1112  }
1113 
1118 
1121  if (Subtarget->hasDSP()) {
1130  }
1131  if (Subtarget->hasBaseDSP()) {
1134  }
1135 
1136  // i64 operation support.
1139  if (Subtarget->isThumb1Only()) {
1142  }
1143  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1144  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1146 
1156 
1157  // MVE lowers 64 bit shifts to lsll and lsrl
1158  // assuming that ISD::SRL and SRA of i64 are already marked custom
1159  if (Subtarget->hasMVEIntegerOps())
1161 
1162  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1163  if (Subtarget->isThumb1Only()) {
1167  }
1168 
1169  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1171 
1172  // ARM does not have ROTL.
1174  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1177  }
1180  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1183  }
1184 
1185  // @llvm.readcyclecounter requires the Performance Monitors extension.
1186  // Default to the 0 expansion on unsupported platforms.
1187  // FIXME: Technically there are older ARM CPUs that have
1188  // implementation-specific ways of obtaining this information.
1189  if (Subtarget->hasPerfMon())
1191 
1192  // Only ARMv6 has BSWAP.
1193  if (!Subtarget->hasV6Ops())
1195 
1196  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1197  : Subtarget->hasDivideInARMMode();
1198  if (!hasDivide) {
1199  // These are expanded into libcalls if the cpu doesn't have HW divider.
1202  }
1203 
1204  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1207 
1210  }
1211 
1214 
1215  // Register based DivRem for AEABI (RTABI 4.2)
1216  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1217  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1218  Subtarget->isTargetWindows()) {
1221  HasStandaloneRem = false;
1222 
1223  if (Subtarget->isTargetWindows()) {
1224  const struct {
1225  const RTLIB::Libcall Op;
1226  const char * const Name;
1227  const CallingConv::ID CC;
1228  } LibraryCalls[] = {
1229  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1230  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1231  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1232  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1233 
1234  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1235  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1236  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1237  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1238  };
1239 
1240  for (const auto &LC : LibraryCalls) {
1241  setLibcallName(LC.Op, LC.Name);
1242  setLibcallCallingConv(LC.Op, LC.CC);
1243  }
1244  } else {
1245  const struct {
1246  const RTLIB::Libcall Op;
1247  const char * const Name;
1248  const CallingConv::ID CC;
1249  } LibraryCalls[] = {
1250  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1251  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1252  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1253  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1254 
1255  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1256  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1257  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1258  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1259  };
1260 
1261  for (const auto &LC : LibraryCalls) {
1262  setLibcallName(LC.Op, LC.Name);
1263  setLibcallCallingConv(LC.Op, LC.CC);
1264  }
1265  }
1266 
1271  } else {
1274  }
1275 
1276  if (Subtarget->getTargetTriple().isOSMSVCRT()) {
1277  // MSVCRT doesn't have powi; fall back to pow
1278  setLibcallName(RTLIB::POWI_F32, nullptr);
1279  setLibcallName(RTLIB::POWI_F64, nullptr);
1280  }
1281 
1286 
1289 
1290  // Use the default implementation.
1297 
1298  if (Subtarget->isTargetWindows())
1300  else
1302 
1303  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1304  // the default expansion.
1305  InsertFencesForAtomic = false;
1306  if (Subtarget->hasAnyDataBarrier() &&
1307  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1308  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1309  // to ldrex/strex loops already.
1311  if (!Subtarget->isThumb() || !Subtarget->isMClass())
1313 
1314  // On v8, we have particularly efficient implementations of atomic fences
1315  // if they can be combined with nearby atomic loads and stores.
1316  if (!Subtarget->hasAcquireRelease() ||
1317  getTargetMachine().getOptLevel() == 0) {
1318  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1319  InsertFencesForAtomic = true;
1320  }
1321  } else {
1322  // If there's anything we can use as a barrier, go through custom lowering
1323  // for ATOMIC_FENCE.
1324  // If target has DMB in thumb, Fences can be inserted.
1325  if (Subtarget->hasDataBarrier())
1326  InsertFencesForAtomic = true;
1327 
1329  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1330 
1331  // Set them all for expansion, which will force libcalls.
1344  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1345  // Unordered/Monotonic case.
1346  if (!InsertFencesForAtomic) {
1349  }
1350  }
1351 
1353 
1354  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1355  if (!Subtarget->hasV6Ops()) {
1358  }
1360 
1361  if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1362  !Subtarget->isThumb1Only()) {
1363  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1364  // iff target supports vfp2.
1368  }
1369 
1370  // We want to custom lower some of our intrinsics.
1375  if (Subtarget->useSjLjEH())
1376  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1377 
1387  if (Subtarget->hasFullFP16()) {
1391  }
1392 
1394 
1397  if (Subtarget->hasFullFP16())
1402 
1403  // We don't support sin/cos/fmod/copysign/pow
1412  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1413  !Subtarget->isThumb1Only()) {
1416  }
1419 
1420  if (!Subtarget->hasVFP4Base()) {
1423  }
1424 
1425  // Various VFP goodness
1426  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1427  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1428  if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1431  }
1432 
1433  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1434  if (!Subtarget->hasFP16()) {
1437  }
1438 
1439  // Strict floating-point comparisons need custom lowering.
1446  }
1447 
1448  // Use __sincos_stret if available.
1449  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1450  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1453  }
1454 
1455  // FP-ARMv8 implements a lot of rounding-like FP operations.
1456  if (Subtarget->hasFPARMv8Base()) {
1465  if (Subtarget->hasNEON()) {
1470  }
1471 
1472  if (Subtarget->hasFP64()) {
1481  }
1482  }
1483 
1484  // FP16 often need to be promoted to call lib functions
1485  if (Subtarget->hasFullFP16()) {
1498 
1500  }
1501 
1502  if (Subtarget->hasNEON()) {
1503  // vmin and vmax aren't available in a scalar form, so we can use
1504  // a NEON instruction with an undef lane instead. This has a performance
1505  // penalty on some cores, so we don't do this unless we have been
1506  // asked to by the core tuning model.
1507  if (Subtarget->useNEONForSinglePrecisionFP()) {
1512  }
1517 
1518  if (Subtarget->hasFullFP16()) {
1523 
1528  }
1529  }
1530 
1531  // We have target-specific dag combine patterns for the following nodes:
1532  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1539 
1540  if (Subtarget->hasMVEIntegerOps())
1542 
1543  if (Subtarget->hasV6Ops())
1545  if (Subtarget->isThumb1Only())
1547 
1549 
1550  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1551  !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1553  else
1555 
1556  //// temporary - rewrite interface to use type
1557  MaxStoresPerMemset = 8;
1559  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1561  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1563 
1564  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1565  // are at least 4 bytes aligned.
1567 
1568  // Prefer likely predicted branches to selects on out-of-order cores.
1569  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1570 
1571  setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1572 
1573  setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1574 
1575  if (Subtarget->isThumb() || Subtarget->isThumb2())
1577 }
1578 
1580  return Subtarget->useSoftFloat();
1581 }
1582 
1583 // FIXME: It might make sense to define the representative register class as the
1584 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1585 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1586 // SPR's representative would be DPR_VFP2. This should work well if register
1587 // pressure tracking were modified such that a register use would increment the
1588 // pressure of the register class's representative and all of it's super
1589 // classes' representatives transitively. We have not implemented this because
1590 // of the difficulty prior to coalescing of modeling operand register classes
1591 // due to the common occurrence of cross class copies and subregister insertions
1592 // and extractions.
1593 std::pair<const TargetRegisterClass *, uint8_t>
1595  MVT VT) const {
1596  const TargetRegisterClass *RRC = nullptr;
1597  uint8_t Cost = 1;
1598  switch (VT.SimpleTy) {
1599  default:
1601  // Use DPR as representative register class for all floating point
1602  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1603  // the cost is 1 for both f32 and f64.
1604  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1605  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1606  RRC = &ARM::DPRRegClass;
1607  // When NEON is used for SP, only half of the register file is available
1608  // because operations that define both SP and DP results will be constrained
1609  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1610  // coalescing by double-counting the SP regs. See the FIXME above.
1611  if (Subtarget->useNEONForSinglePrecisionFP())
1612  Cost = 2;
1613  break;
1614  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1615  case MVT::v4f32: case MVT::v2f64:
1616  RRC = &ARM::DPRRegClass;
1617  Cost = 2;
1618  break;
1619  case MVT::v4i64:
1620  RRC = &ARM::DPRRegClass;
1621  Cost = 4;
1622  break;
1623  case MVT::v8i64:
1624  RRC = &ARM::DPRRegClass;
1625  Cost = 8;
1626  break;
1627  }
1628  return std::make_pair(RRC, Cost);
1629 }
1630 
1631 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1632 #define MAKE_CASE(V) \
1633  case V: \
1634  return #V;
1635  switch ((ARMISD::NodeType)Opcode) {
1636  case ARMISD::FIRST_NUMBER:
1637  break;
1840 #undef MAKE_CASE
1841  }
1842  return nullptr;
1843 }
1844 
1846  EVT VT) const {
1847  if (!VT.isVector())
1848  return getPointerTy(DL);
1849 
1850  // MVE has a predicate register.
1851  if ((Subtarget->hasMVEIntegerOps() &&
1852  (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8)) ||
1853  (Subtarget->hasMVEFloatOps() && (VT == MVT::v4f32 || VT == MVT::v8f16)))
1856 }
1857 
1858 /// getRegClassFor - Return the register class that should be used for the
1859 /// specified value type.
1860 const TargetRegisterClass *
1861 ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1862  (void)isDivergent;
1863  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1864  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1865  // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1866  // MVE Q registers.
1867  if (Subtarget->hasNEON()) {
1868  if (VT == MVT::v4i64)
1869  return &ARM::QQPRRegClass;
1870  if (VT == MVT::v8i64)
1871  return &ARM::QQQQPRRegClass;
1872  }
1873  if (Subtarget->hasMVEIntegerOps()) {
1874  if (VT == MVT::v4i64)
1875  return &ARM::MQQPRRegClass;
1876  if (VT == MVT::v8i64)
1877  return &ARM::MQQQQPRRegClass;
1878  }
1879  return TargetLowering::getRegClassFor(VT);
1880 }
1881 
1882 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1883 // source/dest is aligned and the copy size is large enough. We therefore want
1884 // to align such objects passed to memory intrinsics.
1886  unsigned &PrefAlign) const {
1887  if (!isa<MemIntrinsic>(CI))
1888  return false;
1889  MinSize = 8;
1890  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1891  // cycle faster than 4-byte aligned LDM.
1892  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1893  return true;
1894 }
1895 
1896 // Create a fast isel object.
1897 FastISel *
1899  const TargetLibraryInfo *libInfo) const {
1900  return ARM::createFastISel(funcInfo, libInfo);
1901 }
1902 
1904  unsigned NumVals = N->getNumValues();
1905  if (!NumVals)
1906  return Sched::RegPressure;
1907 
1908  for (unsigned i = 0; i != NumVals; ++i) {
1909  EVT VT = N->getValueType(i);
1910  if (VT == MVT::Glue || VT == MVT::Other)
1911  continue;
1912  if (VT.isFloatingPoint() || VT.isVector())
1913  return Sched::ILP;
1914  }
1915 
1916  if (!N->isMachineOpcode())
1917  return Sched::RegPressure;
1918 
1919  // Load are scheduled for latency even if there instruction itinerary
1920  // is not available.
1921  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1922  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1923 
1924  if (MCID.getNumDefs() == 0)
1925  return Sched::RegPressure;
1926  if (!Itins->isEmpty() &&
1927  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1928  return Sched::ILP;
1929 
1930  return Sched::RegPressure;
1931 }
1932 
1933 //===----------------------------------------------------------------------===//
1934 // Lowering Code
1935 //===----------------------------------------------------------------------===//
1936 
1937 static bool isSRL16(const SDValue &Op) {
1938  if (Op.getOpcode() != ISD::SRL)
1939  return false;
1940  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1941  return Const->getZExtValue() == 16;
1942  return false;
1943 }
1944 
1945 static bool isSRA16(const SDValue &Op) {
1946  if (Op.getOpcode() != ISD::SRA)
1947  return false;
1948  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1949  return Const->getZExtValue() == 16;
1950  return false;
1951 }
1952 
1953 static bool isSHL16(const SDValue &Op) {
1954  if (Op.getOpcode() != ISD::SHL)
1955  return false;
1956  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1957  return Const->getZExtValue() == 16;
1958  return false;
1959 }
1960 
1961 // Check for a signed 16-bit value. We special case SRA because it makes it
1962 // more simple when also looking for SRAs that aren't sign extending a
1963 // smaller value. Without the check, we'd need to take extra care with
1964 // checking order for some operations.
1965 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1966  if (isSRA16(Op))
1967  return isSHL16(Op.getOperand(0));
1968  return DAG.ComputeNumSignBits(Op) == 17;
1969 }
1970 
1971 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1973  switch (CC) {
1974  default: llvm_unreachable("Unknown condition code!");
1975  case ISD::SETNE: return ARMCC::NE;
1976  case ISD::SETEQ: return ARMCC::EQ;
1977  case ISD::SETGT: return ARMCC::GT;
1978  case ISD::SETGE: return ARMCC::GE;
1979  case ISD::SETLT: return ARMCC::LT;
1980  case ISD::SETLE: return ARMCC::LE;
1981  case ISD::SETUGT: return ARMCC::HI;
1982  case ISD::SETUGE: return ARMCC::HS;
1983  case ISD::SETULT: return ARMCC::LO;
1984  case ISD::SETULE: return ARMCC::LS;
1985  }
1986 }
1987 
1988 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1990  ARMCC::CondCodes &CondCode2) {
1991  CondCode2 = ARMCC::AL;
1992  switch (CC) {
1993  default: llvm_unreachable("Unknown FP condition!");
1994  case ISD::SETEQ:
1995  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
1996  case ISD::SETGT:
1997  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1998  case ISD::SETGE:
1999  case ISD::SETOGE: CondCode = ARMCC::GE; break;
2000  case ISD::SETOLT: CondCode = ARMCC::MI; break;
2001  case ISD::SETOLE: CondCode = ARMCC::LS; break;
2002  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2003  case ISD::SETO: CondCode = ARMCC::VC; break;
2004  case ISD::SETUO: CondCode = ARMCC::VS; break;
2005  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2006  case ISD::SETUGT: CondCode = ARMCC::HI; break;
2007  case ISD::SETUGE: CondCode = ARMCC::PL; break;
2008  case ISD::SETLT:
2009  case ISD::SETULT: CondCode = ARMCC::LT; break;
2010  case ISD::SETLE:
2011  case ISD::SETULE: CondCode = ARMCC::LE; break;
2012  case ISD::SETNE:
2013  case ISD::SETUNE: CondCode = ARMCC::NE; break;
2014  }
2015 }
2016 
2017 //===----------------------------------------------------------------------===//
2018 // Calling Convention Implementation
2019 //===----------------------------------------------------------------------===//
2020 
2021 /// getEffectiveCallingConv - Get the effective calling convention, taking into
2022 /// account presence of floating point hardware and calling convention
2023 /// limitations, such as support for variadic functions.
2025 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2026  bool isVarArg) const {
2027  switch (CC) {
2028  default:
2029  report_fatal_error("Unsupported calling convention");
2031  case CallingConv::ARM_APCS:
2032  case CallingConv::GHC:
2034  return CC;
2038  case CallingConv::Swift:
2041  case CallingConv::C:
2042  case CallingConv::Tail:
2043  if (!Subtarget->isAAPCS_ABI())
2044  return CallingConv::ARM_APCS;
2045  else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
2046  getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2047  !isVarArg)
2049  else
2050  return CallingConv::ARM_AAPCS;
2051  case CallingConv::Fast:
2053  if (!Subtarget->isAAPCS_ABI()) {
2054  if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2055  return CallingConv::Fast;
2056  return CallingConv::ARM_APCS;
2057  } else if (Subtarget->hasVFP2Base() &&
2058  !Subtarget->isThumb1Only() && !isVarArg)
2060  else
2061  return CallingConv::ARM_AAPCS;
2062  }
2063 }
2064 
2066  bool isVarArg) const {
2067  return CCAssignFnForNode(CC, false, isVarArg);
2068 }
2069 
2071  bool isVarArg) const {
2072  return CCAssignFnForNode(CC, true, isVarArg);
2073 }
2074 
2075 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2076 /// CallingConvention.
2077 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2078  bool Return,
2079  bool isVarArg) const {
2080  switch (getEffectiveCallingConv(CC, isVarArg)) {
2081  default:
2082  report_fatal_error("Unsupported calling convention");
2083  case CallingConv::ARM_APCS:
2084  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2086  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2088  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2089  case CallingConv::Fast:
2090  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2091  case CallingConv::GHC:
2092  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2094  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2096  return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2097  }
2098 }
2099 
2100 SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2101  MVT LocVT, MVT ValVT, SDValue Val) const {
2102  Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2103  Val);
2104  if (Subtarget->hasFullFP16()) {
2105  Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2106  } else {
2107  Val = DAG.getNode(ISD::TRUNCATE, dl,
2108  MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2109  Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2110  }
2111  return Val;
2112 }
2113 
2114 SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2115  MVT LocVT, MVT ValVT,
2116  SDValue Val) const {
2117  if (Subtarget->hasFullFP16()) {
2118  Val = DAG.getNode(ARMISD::VMOVrh, dl,
2119  MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2120  } else {
2121  Val = DAG.getNode(ISD::BITCAST, dl,
2122  MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2123  Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2124  MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2125  }
2126  return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2127 }
2128 
2129 /// LowerCallResult - Lower the result values of a call into the
2130 /// appropriate copies out of appropriate physical registers.
2131 SDValue ARMTargetLowering::LowerCallResult(
2132  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2133  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2134  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2135  SDValue ThisVal) const {
2136  // Assign locations to each value returned by this call.
2138  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2139  *DAG.getContext());
2140  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2141 
2142  // Copy all of the result registers out of their specified physreg.
2143  for (unsigned i = 0; i != RVLocs.size(); ++i) {
2144  CCValAssign VA = RVLocs[i];
2145 
2146  // Pass 'this' value directly from the argument to return value, to avoid
2147  // reg unit interference
2148  if (i == 0 && isThisReturn) {
2149  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2150  "unexpected return calling convention register assignment");
2151  InVals.push_back(ThisVal);
2152  continue;
2153  }
2154 
2155  SDValue Val;
2156  if (VA.needsCustom() &&
2157  (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2158  // Handle f64 or half of a v2f64.
2159  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2160  InFlag);
2161  Chain = Lo.getValue(1);
2162  InFlag = Lo.getValue(2);
2163  VA = RVLocs[++i]; // skip ahead to next loc
2164  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2165  InFlag);
2166  Chain = Hi.getValue(1);
2167  InFlag = Hi.getValue(2);
2168  if (!Subtarget->isLittle())
2169  std::swap (Lo, Hi);
2170  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2171 
2172  if (VA.getLocVT() == MVT::v2f64) {
2173  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2174  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2175  DAG.getConstant(0, dl, MVT::i32));
2176 
2177  VA = RVLocs[++i]; // skip ahead to next loc
2178  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2179  Chain = Lo.getValue(1);
2180  InFlag = Lo.getValue(2);
2181  VA = RVLocs[++i]; // skip ahead to next loc
2182  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2183  Chain = Hi.getValue(1);
2184  InFlag = Hi.getValue(2);
2185  if (!Subtarget->isLittle())
2186  std::swap (Lo, Hi);
2187  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2188  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2189  DAG.getConstant(1, dl, MVT::i32));
2190  }
2191  } else {
2192  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2193  InFlag);
2194  Chain = Val.getValue(1);
2195  InFlag = Val.getValue(2);
2196  }
2197 
2198  switch (VA.getLocInfo()) {
2199  default: llvm_unreachable("Unknown loc info!");
2200  case CCValAssign::Full: break;
2201  case CCValAssign::BCvt:
2202  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2203  break;
2204  }
2205 
2206  // f16 arguments have their size extended to 4 bytes and passed as if they
2207  // had been copied to the LSBs of a 32-bit register.
2208  // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2209  if (VA.needsCustom() &&
2210  (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2211  Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2212 
2213  InVals.push_back(Val);
2214  }
2215 
2216  return Chain;
2217 }
2218 
2219 std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2220  const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2221  bool IsTailCall, int SPDiff) const {
2222  SDValue DstAddr;
2223  MachinePointerInfo DstInfo;
2224  int32_t Offset = VA.getLocMemOffset();
2225  MachineFunction &MF = DAG.getMachineFunction();
2226 
2227  if (IsTailCall) {
2228  Offset += SPDiff;
2229  auto PtrVT = getPointerTy(DAG.getDataLayout());
2230  int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2231  int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2232  DstAddr = DAG.getFrameIndex(FI, PtrVT);
2233  DstInfo =
2235  } else {
2236  SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2237  DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2238  StackPtr, PtrOff);
2239  DstInfo =
2241  }
2242 
2243  return std::make_pair(DstAddr, DstInfo);
2244 }
2245 
2246 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2247  SDValue Chain, SDValue &Arg,
2248  RegsToPassVector &RegsToPass,
2249  CCValAssign &VA, CCValAssign &NextVA,
2250  SDValue &StackPtr,
2251  SmallVectorImpl<SDValue> &MemOpChains,
2252  bool IsTailCall,
2253  int SPDiff) const {
2254  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2255  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2256  unsigned id = Subtarget->isLittle() ? 0 : 1;
2257  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2258 
2259  if (NextVA.isRegLoc())
2260  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2261  else {
2262  assert(NextVA.isMemLoc());
2263  if (!StackPtr.getNode())
2264  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2265  getPointerTy(DAG.getDataLayout()));
2266 
2267  SDValue DstAddr;
2268  MachinePointerInfo DstInfo;
2269  std::tie(DstAddr, DstInfo) =
2270  computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2271  MemOpChains.push_back(
2272  DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2273  }
2274 }
2275 
2276 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2277  return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2279 }
2280 
2281 /// LowerCall - Lowering a call into a callseq_start <-
2282 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2283 /// nodes.
2284 SDValue
2285 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2286  SmallVectorImpl<SDValue> &InVals) const {
2287  SelectionDAG &DAG = CLI.DAG;
2288  SDLoc &dl = CLI.DL;
2290  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2292  SDValue Chain = CLI.Chain;
2293  SDValue Callee = CLI.Callee;
2294  bool &isTailCall = CLI.IsTailCall;
2295  CallingConv::ID CallConv = CLI.CallConv;
2296  bool doesNotRet = CLI.DoesNotReturn;
2297  bool isVarArg = CLI.IsVarArg;
2298 
2299  MachineFunction &MF = DAG.getMachineFunction();
2302  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2303  bool isThisReturn = false;
2304  bool isCmseNSCall = false;
2305  bool isSibCall = false;
2306  bool PreferIndirect = false;
2307 
2308  // Determine whether this is a non-secure function call.
2309  if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2310  isCmseNSCall = true;
2311 
2312  // Disable tail calls if they're not supported.
2313  if (!Subtarget->supportsTailCall())
2314  isTailCall = false;
2315 
2316  // For both the non-secure calls and the returns from a CMSE entry function,
2317  // the function needs to do some extra work afte r the call, or before the
2318  // return, respectively, thus it cannot end with atail call
2319  if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2320  isTailCall = false;
2321 
2322  if (isa<GlobalAddressSDNode>(Callee)) {
2323  // If we're optimizing for minimum size and the function is called three or
2324  // more times in this block, we can improve codesize by calling indirectly
2325  // as BLXr has a 16-bit encoding.
2326  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2327  if (CLI.CB) {
2328  auto *BB = CLI.CB->getParent();
2329  PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2330  count_if(GV->users(), [&BB](const User *U) {
2331  return isa<Instruction>(U) &&
2332  cast<Instruction>(U)->getParent() == BB;
2333  }) > 2;
2334  }
2335  }
2336  if (isTailCall) {
2337  // Check if it's really possible to do a tail call.
2338  isTailCall = IsEligibleForTailCallOptimization(
2339  Callee, CallConv, isVarArg, isStructRet,
2340  MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2341  PreferIndirect);
2342 
2343  if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2344  CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2345  isSibCall = true;
2346 
2347  // We don't support GuaranteedTailCallOpt for ARM, only automatically
2348  // detected sibcalls.
2349  if (isTailCall)
2350  ++NumTailCalls;
2351  }
2352 
2353  if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2354  report_fatal_error("failed to perform tail call elimination on a call "
2355  "site marked musttail");
2356  // Analyze operands of the call, assigning locations to each operand.
2358  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2359  *DAG.getContext());
2360  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2361 
2362  // Get a count of how many bytes are to be pushed on the stack.
2363  unsigned NumBytes = CCInfo.getNextStackOffset();
2364 
2365  // SPDiff is the byte offset of the call's argument area from the callee's.
2366  // Stores to callee stack arguments will be placed in FixedStackSlots offset
2367  // by this amount for a tail call. In a sibling call it must be 0 because the
2368  // caller will deallocate the entire stack and the callee still expects its
2369  // arguments to begin at SP+0. Completely unused for non-tail calls.
2370  int SPDiff = 0;
2371 
2372  if (isTailCall && !isSibCall) {
2373  auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2374  unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2375 
2376  // Since callee will pop argument stack as a tail call, we must keep the
2377  // popped size 16-byte aligned.
2379  NumBytes = alignTo(NumBytes, StackAlign);
2380 
2381  // SPDiff will be negative if this tail call requires more space than we
2382  // would automatically have in our incoming argument space. Positive if we
2383  // can actually shrink the stack.
2384  SPDiff = NumReusableBytes - NumBytes;
2385 
2386  // If this call requires more stack than we have available from
2387  // LowerFormalArguments, tell FrameLowering to reserve space for it.
2388  if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2389  AFI->setArgRegsSaveSize(-SPDiff);
2390  }
2391 
2392  if (isSibCall) {
2393  // For sibling tail calls, memory operands are available in our caller's stack.
2394  NumBytes = 0;
2395  } else {
2396  // Adjust the stack pointer for the new arguments...
2397  // These operations are automatically eliminated by the prolog/epilog pass
2398  Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2399  }
2400 
2401  SDValue StackPtr =
2402  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2403 
2404  RegsToPassVector RegsToPass;
2405  SmallVector<SDValue, 8> MemOpChains;
2406 
2407  // During a tail call, stores to the argument area must happen after all of
2408  // the function's incoming arguments have been loaded because they may alias.
2409  // This is done by folding in a TokenFactor from LowerFormalArguments, but
2410  // there's no point in doing so repeatedly so this tracks whether that's
2411  // happened yet.
2412  bool AfterFormalArgLoads = false;
2413 
2414  // Walk the register/memloc assignments, inserting copies/loads. In the case
2415  // of tail call optimization, arguments are handled later.
2416  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2417  i != e;
2418  ++i, ++realArgIdx) {
2419  CCValAssign &VA = ArgLocs[i];
2420  SDValue Arg = OutVals[realArgIdx];
2421  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2422  bool isByVal = Flags.isByVal();
2423 
2424  // Promote the value if needed.
2425  switch (VA.getLocInfo()) {
2426  default: llvm_unreachable("Unknown loc info!");
2427  case CCValAssign::Full: break;
2428  case CCValAssign::SExt:
2429  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2430  break;
2431  case CCValAssign::ZExt:
2432  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2433  break;
2434  case CCValAssign::AExt:
2435  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2436  break;
2437  case CCValAssign::BCvt:
2438  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2439  break;
2440  }
2441 
2442  if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2443  Chain = DAG.getStackArgumentTokenFactor(Chain);
2444  AfterFormalArgLoads = true;
2445  }
2446 
2447  // f16 arguments have their size extended to 4 bytes and passed as if they
2448  // had been copied to the LSBs of a 32-bit register.
2449  // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2450  if (VA.needsCustom() &&
2451  (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2452  Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2453  } else {
2454  // f16 arguments could have been extended prior to argument lowering.
2455  // Mask them arguments if this is a CMSE nonsecure call.
2456  auto ArgVT = Outs[realArgIdx].ArgVT;
2457  if (isCmseNSCall && (ArgVT == MVT::f16)) {
2458  auto LocBits = VA.getLocVT().getSizeInBits();
2459  auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2460  SDValue Mask =
2461  DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2462  Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2463  Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2464  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2465  }
2466  }
2467 
2468  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2469  if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2471  DAG.getConstant(0, dl, MVT::i32));
2473  DAG.getConstant(1, dl, MVT::i32));
2474 
2475  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2476  StackPtr, MemOpChains, isTailCall, SPDiff);
2477 
2478  VA = ArgLocs[++i]; // skip ahead to next loc
2479  if (VA.isRegLoc()) {
2480  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2481  StackPtr, MemOpChains, isTailCall, SPDiff);
2482  } else {
2483  assert(VA.isMemLoc());
2484  SDValue DstAddr;
2485  MachinePointerInfo DstInfo;
2486  std::tie(DstAddr, DstInfo) =
2487  computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2488  MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2489  }
2490  } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2491  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2492  StackPtr, MemOpChains, isTailCall, SPDiff);
2493  } else if (VA.isRegLoc()) {
2494  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2495  Outs[0].VT == MVT::i32) {
2496  assert(VA.getLocVT() == MVT::i32 &&
2497  "unexpected calling convention register assignment");
2498  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2499  "unexpected use of 'returned'");
2500  isThisReturn = true;
2501  }
2502  const TargetOptions &Options = DAG.getTarget().Options;
2503  if (Options.EmitCallSiteInfo)
2504  CSInfo.emplace_back(VA.getLocReg(), i);
2505  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2506  } else if (isByVal) {
2507  assert(VA.isMemLoc());
2508  unsigned offset = 0;
2509 
2510  // True if this byval aggregate will be split between registers
2511  // and memory.
2512  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2513  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2514 
2515  if (CurByValIdx < ByValArgsCount) {
2516 
2517  unsigned RegBegin, RegEnd;
2518  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2519 
2520  EVT PtrVT =
2522  unsigned int i, j;
2523  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2524  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2525  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2526  SDValue Load =
2527  DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2528  DAG.InferPtrAlign(AddArg));
2529  MemOpChains.push_back(Load.getValue(1));
2530  RegsToPass.push_back(std::make_pair(j, Load));
2531  }
2532 
2533  // If parameter size outsides register area, "offset" value
2534  // helps us to calculate stack slot for remained part properly.
2535  offset = RegEnd - RegBegin;
2536 
2537  CCInfo.nextInRegsParam();
2538  }
2539 
2540  if (Flags.getByValSize() > 4*offset) {
2541  auto PtrVT = getPointerTy(DAG.getDataLayout());
2542  SDValue Dst;
2543  MachinePointerInfo DstInfo;
2544  std::tie(Dst, DstInfo) =
2545  computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2546  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2547  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2548  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2549  MVT::i32);
2550  SDValue AlignNode =
2551  DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2552 
2553  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2554  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2555  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2556  Ops));
2557  }
2558  } else {
2559  assert(VA.isMemLoc());
2560  SDValue DstAddr;
2561  MachinePointerInfo DstInfo;
2562  std::tie(DstAddr, DstInfo) =
2563  computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2564 
2565  SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2566  MemOpChains.push_back(Store);
2567  }
2568  }
2569 
2570  if (!MemOpChains.empty())
2571  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2572 
2573  // Build a sequence of copy-to-reg nodes chained together with token chain
2574  // and flag operands which copy the outgoing args into the appropriate regs.
2575  SDValue InFlag;
2576  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2577  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2578  RegsToPass[i].second, InFlag);
2579  InFlag = Chain.getValue(1);
2580  }
2581 
2582  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2583  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2584  // node so that legalize doesn't hack it.
2585  bool isDirect = false;
2586 
2587  const TargetMachine &TM = getTargetMachine();
2588  const Module *Mod = MF.getFunction().getParent();
2589  const GlobalValue *GV = nullptr;
2590  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2591  GV = G->getGlobal();
2592  bool isStub =
2593  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2594 
2595  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2596  bool isLocalARMFunc = false;
2597  auto PtrVt = getPointerTy(DAG.getDataLayout());
2598 
2599  if (Subtarget->genLongCalls()) {
2600  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2601  "long-calls codegen is not position independent!");
2602  // Handle a global address or an external symbol. If it's not one of
2603  // those, the target's already in a register, so we don't need to do
2604  // anything extra.
2605  if (isa<GlobalAddressSDNode>(Callee)) {
2606  // Create a constant pool entry for the callee address
2607  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2608  ARMConstantPoolValue *CPV =
2609  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2610 
2611  // Get the address of the callee into a register
2612  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2613  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2614  Callee = DAG.getLoad(
2615  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2617  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2618  const char *Sym = S->getSymbol();
2619 
2620  // Create a constant pool entry for the callee address
2621  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2622  ARMConstantPoolValue *CPV =
2624  ARMPCLabelIndex, 0);
2625  // Get the address of the callee into a register
2626  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2627  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2628  Callee = DAG.getLoad(
2629  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2631  }
2632  } else if (isa<GlobalAddressSDNode>(Callee)) {
2633  if (!PreferIndirect) {
2634  isDirect = true;
2635  bool isDef = GV->isStrongDefinitionForLinker();
2636 
2637  // ARM call to a local ARM function is predicable.
2638  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2639  // tBX takes a register source operand.
2640  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2641  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2642  Callee = DAG.getNode(
2643  ARMISD::WrapperPIC, dl, PtrVt,
2644  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2645  Callee = DAG.getLoad(
2646  PtrVt, dl, DAG.getEntryNode(), Callee,
2650  } else if (Subtarget->isTargetCOFF()) {
2651  assert(Subtarget->isTargetWindows() &&
2652  "Windows is the only supported COFF target");
2653  unsigned TargetFlags = ARMII::MO_NO_FLAG;
2654  if (GV->hasDLLImportStorageClass())
2655  TargetFlags = ARMII::MO_DLLIMPORT;
2656  else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
2657  TargetFlags = ARMII::MO_COFFSTUB;
2658  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
2659  TargetFlags);
2660  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2661  Callee =
2662  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2663  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2665  } else {
2666  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2667  }
2668  }
2669  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2670  isDirect = true;
2671  // tBX takes a register source operand.
2672  const char *Sym = S->getSymbol();
2673  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2674  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2675  ARMConstantPoolValue *CPV =
2677  ARMPCLabelIndex, 4);
2678  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2679  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2680  Callee = DAG.getLoad(
2681  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2683  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2684  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2685  } else {
2686  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2687  }
2688  }
2689 
2690  if (isCmseNSCall) {
2691  assert(!isARMFunc && !isDirect &&
2692  "Cannot handle call to ARM function or direct call");
2693  if (NumBytes > 0) {
2695  "call to non-secure function would "
2696  "require passing arguments on stack",
2697  dl.getDebugLoc());
2698  DAG.getContext()->diagnose(Diag);
2699  }
2700  if (isStructRet) {
2703  "call to non-secure function would return value through pointer",
2704  dl.getDebugLoc());
2705  DAG.getContext()->diagnose(Diag);
2706  }
2707  }
2708 
2709  // FIXME: handle tail calls differently.
2710  unsigned CallOpc;
2711  if (Subtarget->isThumb()) {
2712  if (isCmseNSCall)
2713  CallOpc = ARMISD::tSECALL;
2714  else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2715  CallOpc = ARMISD::CALL_NOLINK;
2716  else
2717  CallOpc = ARMISD::CALL;
2718  } else {
2719  if (!isDirect && !Subtarget->hasV5TOps())
2720  CallOpc = ARMISD::CALL_NOLINK;
2721  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2722  // Emit regular call when code size is the priority
2723  !Subtarget->hasMinSize())
2724  // "mov lr, pc; b _foo" to avoid confusing the RSP
2725  CallOpc = ARMISD::CALL_NOLINK;
2726  else
2727  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2728  }
2729 
2730  // We don't usually want to end the call-sequence here because we would tidy
2731  // the frame up *after* the call, however in the ABI-changing tail-call case
2732  // we've carefully laid out the parameters so that when sp is reset they'll be
2733  // in the correct location.
2734  if (isTailCall && !isSibCall) {
2735  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
2736  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2737  InFlag = Chain.getValue(1);
2738  }
2739 
2740  std::vector<SDValue> Ops;
2741  Ops.push_back(Chain);
2742  Ops.push_back(Callee);
2743 
2744  if (isTailCall) {
2745  Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32));
2746  }
2747 
2748  // Add argument registers to the end of the list so that they are known live
2749  // into the call.
2750  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2751  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2752  RegsToPass[i].second.getValueType()));
2753 
2754  // Add a register mask operand representing the call-preserved registers.
2755  if (!isTailCall) {
2756  const uint32_t *Mask;
2757  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2758  if (isThisReturn) {
2759  // For 'this' returns, use the R0-preserving mask if applicable
2760  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2761  if (!Mask) {
2762  // Set isThisReturn to false if the calling convention is not one that
2763  // allows 'returned' to be modeled in this way, so LowerCallResult does
2764  // not try to pass 'this' straight through
2765  isThisReturn = false;
2766  Mask = ARI->getCallPreservedMask(MF, CallConv);
2767  }
2768  } else
2769  Mask = ARI->getCallPreservedMask(MF, CallConv);
2770 
2771  assert(Mask && "Missing call preserved mask for calling convention");
2772  Ops.push_back(DAG.getRegisterMask(Mask));
2773  }
2774 
2775  if (InFlag.getNode())
2776  Ops.push_back(InFlag);
2777 
2778  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2779  if (isTailCall) {
2781  SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2782  DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2783  return Ret;
2784  }
2785 
2786  // Returns a chain and a flag for retval copy to use.
2787  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2788  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2789  InFlag = Chain.getValue(1);
2790  DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2791 
2792  // If we're guaranteeing tail-calls will be honoured, the callee must
2793  // pop its own argument stack on return. But this call is *not* a tail call so
2794  // we need to undo that after it returns to restore the status-quo.
2795  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
2796  uint64_t CalleePopBytes =
2797  canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
2798 
2799  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2800  DAG.getIntPtrConstant(CalleePopBytes, dl, true),
2801  InFlag, dl);
2802  if (!Ins.empty())
2803  InFlag = Chain.getValue(1);
2804 
2805  // Handle result values, copying them out of physregs into vregs that we
2806  // return.
2807  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2808  InVals, isThisReturn,
2809  isThisReturn ? OutVals[0] : SDValue());
2810 }
2811 
2812 /// HandleByVal - Every parameter *after* a byval parameter is passed
2813 /// on the stack. Remember the next parameter register to allocate,
2814 /// and then confiscate the rest of the parameter registers to insure
2815 /// this.
2816 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2817  Align Alignment) const {
2818  // Byval (as with any stack) slots are always at least 4 byte aligned.
2819  Alignment = std::max(Alignment, Align(4));
2820 
2821  unsigned Reg = State->AllocateReg(GPRArgRegs);
2822  if (!Reg)
2823  return;
2824 
2825  unsigned AlignInRegs = Alignment.value() / 4;
2826  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2827  for (unsigned i = 0; i < Waste; ++i)
2828  Reg = State->AllocateReg(GPRArgRegs);
2829 
2830  if (!Reg)
2831  return;
2832 
2833  unsigned Excess = 4 * (ARM::R4 - Reg);
2834 
2835  // Special case when NSAA != SP and parameter size greater than size of
2836  // all remained GPR regs. In that case we can't split parameter, we must
2837  // send it to stack. We also must set NCRN to R4, so waste all
2838  // remained registers.
2839  const unsigned NSAAOffset = State->getNextStackOffset();
2840  if (NSAAOffset != 0 && Size > Excess) {
2841  while (State->AllocateReg(GPRArgRegs))
2842  ;
2843  return;
2844  }
2845 
2846  // First register for byval parameter is the first register that wasn't
2847  // allocated before this method call, so it would be "reg".
2848  // If parameter is small enough to be saved in range [reg, r4), then
2849  // the end (first after last) register would be reg + param-size-in-regs,
2850  // else parameter would be splitted between registers and stack,
2851  // end register would be r4 in this case.
2852  unsigned ByValRegBegin = Reg;
2853  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2854  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2855  // Note, first register is allocated in the beginning of function already,
2856  // allocate remained amount of registers we need.
2857  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2858  State->AllocateReg(GPRArgRegs);
2859  // A byval parameter that is split between registers and memory needs its
2860  // size truncated here.
2861  // In the case where the entire structure fits in registers, we set the
2862  // size in memory to zero.
2863  Size = std::max<int>(Size - Excess, 0);
2864 }
2865 
2866 /// MatchingStackOffset - Return true if the given stack call argument is
2867 /// already available in the same position (relatively) of the caller's
2868 /// incoming argument stack.
2869 static
2872  const TargetInstrInfo *TII) {
2873  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2874  int FI = std::numeric_limits<int>::max();
2875  if (Arg.getOpcode() == ISD::CopyFromReg) {
2876  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2877  if (!Register::isVirtualRegister(VR))
2878  return false;
2879  MachineInstr *Def = MRI->getVRegDef(VR);
2880  if (!Def)
2881  return false;
2882  if (!Flags.isByVal()) {
2883  if (!TII->isLoadFromStackSlot(*Def, FI))
2884  return false;
2885  } else {
2886  return false;
2887  }
2888  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2889  if (Flags.isByVal())
2890  // ByVal argument is passed in as a pointer but it's now being
2891  // dereferenced. e.g.
2892  // define @foo(%struct.X* %A) {
2893  // tail call @bar(%struct.X* byval %A)
2894  // }
2895  return false;
2896  SDValue Ptr = Ld->getBasePtr();
2897  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2898  if (!FINode)
2899  return false;
2900  FI = FINode->getIndex();
2901  } else
2902  return false;
2903 
2905  if (!MFI.isFixedObjectIndex(FI))
2906  return false;
2907  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2908 }
2909 
2910 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2911 /// for tail call optimization. Targets which want to do tail call
2912 /// optimization should implement this function.
2913 bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2914  SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2915  bool isCalleeStructRet, bool isCallerStructRet,
2916  const SmallVectorImpl<ISD::OutputArg> &Outs,
2917  const SmallVectorImpl<SDValue> &OutVals,
2919  const bool isIndirect) const {
2920  MachineFunction &MF = DAG.getMachineFunction();
2921  const Function &CallerF = MF.getFunction();
2922  CallingConv::ID CallerCC = CallerF.getCallingConv();
2923 
2924  assert(Subtarget->supportsTailCall());
2925 
2926  // Indirect tail calls cannot be optimized for Thumb1 if the args
2927  // to the call take up r0-r3. The reason is that there are no legal registers
2928  // left to hold the pointer to the function to be called.
2929  if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2930  (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
2931  return false;
2932 
2933  // Look for obvious safe cases to perform tail call optimization that do not
2934  // require ABI changes. This is what gcc calls sibcall.
2935 
2936  // Exception-handling functions need a special set of instructions to indicate
2937  // a return to the hardware. Tail-calling another function would probably
2938  // break this.
2939  if (CallerF.hasFnAttribute("interrupt"))
2940  return false;
2941 
2942  if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
2943  return CalleeCC == CallerCC;
2944 
2945  // Also avoid sibcall optimization if either caller or callee uses struct
2946  // return semantics.
2947  if (isCalleeStructRet || isCallerStructRet)
2948  return false;
2949 
2950  // Externally-defined functions with weak linkage should not be
2951  // tail-called on ARM when the OS does not support dynamic
2952  // pre-emption of symbols, as the AAELF spec requires normal calls
2953  // to undefined weak functions to be replaced with a NOP or jump to the
2954  // next instruction. The behaviour of branch instructions in this
2955  // situation (as used for tail calls) is implementation-defined, so we
2956  // cannot rely on the linker replacing the tail call with a return.
2957  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2958  const GlobalValue *GV = G->getGlobal();
2960  if (GV->hasExternalWeakLinkage() &&
2961  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2962  return false;
2963  }
2964 
2965  // Check that the call results are passed in the same way.
2966  LLVMContext &C = *DAG.getContext();
2968  getEffectiveCallingConv(CalleeCC, isVarArg),
2969  getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
2970  CCAssignFnForReturn(CalleeCC, isVarArg),
2971  CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
2972  return false;
2973  // The callee has to preserve all registers the caller needs to preserve.
2974  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2975  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2976  if (CalleeCC != CallerCC) {
2977  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2978  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2979  return false;
2980  }
2981 
2982  // If Caller's vararg or byval argument has been split between registers and
2983  // stack, do not perform tail call, since part of the argument is in caller's
2984  // local frame.
2985  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2986  if (AFI_Caller->getArgRegsSaveSize())
2987  return false;
2988 
2989  // If the callee takes no arguments then go on to check the results of the
2990  // call.
2991  if (!Outs.empty()) {
2992  // Check if stack adjustment is needed. For now, do not do this if any
2993  // argument is passed on the stack.
2995  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2996  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2997  if (CCInfo.getNextStackOffset()) {
2998  // Check if the arguments are already laid out in the right way as
2999  // the caller's fixed stack objects.
3000  MachineFrameInfo &MFI = MF.getFrameInfo();
3001  const MachineRegisterInfo *MRI = &MF.getRegInfo();
3002  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3003  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3004  i != e;
3005  ++i, ++realArgIdx) {
3006  CCValAssign &VA = ArgLocs[i];
3007  EVT RegVT = VA.getLocVT();
3008  SDValue Arg = OutVals[realArgIdx];
3009  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3010  if (VA.getLocInfo() == CCValAssign::Indirect)
3011  return false;
3012  if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3013  // f64 and vector types are split into multiple registers or
3014  // register/stack-slot combinations. The types will not match
3015  // the registers; give up on memory f64 refs until we figure
3016  // out what to do about this.
3017  if (!VA.isRegLoc())
3018  return false;
3019  if (!ArgLocs[++i].isRegLoc())
3020  return false;
3021  if (RegVT == MVT::v2f64) {
3022  if (!ArgLocs[++i].isRegLoc())
3023  return false;
3024  if (!ArgLocs[++i].isRegLoc())
3025  return false;
3026  }
3027  } else if (!VA.isRegLoc()) {
3028  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
3029  MFI, MRI, TII))
3030  return false;
3031  }
3032  }
3033  }
3034 
3035  const MachineRegisterInfo &MRI = MF.getRegInfo();
3036  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3037  return false;
3038  }
3039 
3040  return true;
3041 }
3042 
3043 bool
3044 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3045  MachineFunction &MF, bool isVarArg,
3046  const SmallVectorImpl<ISD::OutputArg> &Outs,
3047  LLVMContext &Context) const {
3049  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3050  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3051 }
3052 
3054  const SDLoc &DL, SelectionDAG &DAG) {
3055  const MachineFunction &MF = DAG.getMachineFunction();
3056  const Function &F = MF.getFunction();
3057 
3058  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3059 
3060  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3061  // version of the "preferred return address". These offsets affect the return
3062  // instruction if this is a return from PL1 without hypervisor extensions.
3063  // IRQ/FIQ: +4 "subs pc, lr, #4"
3064  // SWI: 0 "subs pc, lr, #0"
3065  // ABORT: +4 "subs pc, lr, #4"
3066  // UNDEF: +4/+2 "subs pc, lr, #0"
3067  // UNDEF varies depending on where the exception came from ARM or Thumb
3068  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3069 
3070  int64_t LROffset;
3071  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3072  IntKind == "ABORT")
3073  LROffset = 4;
3074  else if (IntKind == "SWI" || IntKind == "UNDEF")
3075  LROffset = 0;
3076  else
3077  report_fatal_error("Unsupported interrupt attribute. If present, value "
3078  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3079 
3080  RetOps.insert(RetOps.begin() + 1,
3081  DAG.getConstant(LROffset, DL, MVT::i32, false));
3082 
3083  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
3084 }
3085 
3086 SDValue
3087 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3088  bool isVarArg,
3089  const SmallVectorImpl<ISD::OutputArg> &Outs,
3090  const SmallVectorImpl<SDValue> &OutVals,
3091  const SDLoc &dl, SelectionDAG &DAG) const {
3092  // CCValAssign - represent the assignment of the return value to a location.
3094 
3095  // CCState - Info about the registers and stack slots.
3096  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3097  *DAG.getContext());
3098 
3099  // Analyze outgoing return values.
3100  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3101 
3102  SDValue Flag;
3103  SmallVector<SDValue, 4> RetOps;
3104  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3105  bool isLittleEndian = Subtarget->isLittle();
3106 
3107  MachineFunction &MF = DAG.getMachineFunction();
3109  AFI->setReturnRegsCount(RVLocs.size());
3110 
3111  // Report error if cmse entry function returns structure through first ptr arg.
3112  if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3113  // Note: using an empty SDLoc(), as the first line of the function is a
3114  // better place to report than the last line.
3117  "secure entry function would return value through pointer",
3118  SDLoc().getDebugLoc());
3119  DAG.getContext()->diagnose(Diag);
3120  }
3121 
3122  // Copy the result values into the output registers.
3123  for (unsigned i = 0, realRVLocIdx = 0;
3124  i != RVLocs.size();
3125  ++i, ++realRVLocIdx) {
3126  CCValAssign &VA = RVLocs[i];
3127  assert(VA.isRegLoc() && "Can only return in registers!");
3128 
3129  SDValue Arg = OutVals[realRVLocIdx];
3130  bool ReturnF16 = false;
3131 
3132  if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3133  // Half-precision return values can be returned like this:
3134  //
3135  // t11 f16 = fadd ...
3136  // t12: i16 = bitcast t11
3137  // t13: i32 = zero_extend t12
3138  // t14: f32 = bitcast t13 <~~~~~~~ Arg
3139  //
3140  // to avoid code generation for bitcasts, we simply set Arg to the node
3141  // that produces the f16 value, t11 in this case.
3142  //
3143  if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3144  SDValue ZE = Arg.getOperand(0);
3145  if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3146  SDValue BC = ZE.getOperand(0);
3147  if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3148  Arg = BC.getOperand(0);
3149  ReturnF16 = true;
3150  }
3151  }
3152  }
3153  }
3154 
3155  switch (VA.getLocInfo()) {
3156  default: llvm_unreachable("Unknown loc info!");
3157  case CCValAssign::Full: break;
3158  case CCValAssign::BCvt:
3159  if (!ReturnF16)
3160  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3161  break;
3162  }
3163 
3164  // Mask f16 arguments if this is a CMSE nonsecure entry.
3165  auto RetVT = Outs[realRVLocIdx].ArgVT;
3166  if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3167  if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3168  Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3169  } else {
3170  auto LocBits = VA.getLocVT().getSizeInBits();
3171  auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3172  SDValue Mask =
3173  DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3174  Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3175  Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3176  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3177  }
3178  }
3179 
3180  if (VA.needsCustom() &&
3181  (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3182  if (VA.getLocVT() == MVT::v2f64) {
3183  // Extract the first half and return it in two registers.
3185  DAG.getConstant(0, dl, MVT::i32));
3186  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3187  DAG.getVTList(MVT::i32, MVT::i32), Half);
3188 
3189  Chain =
3190  DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3191  HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag);
3192  Flag = Chain.getValue(1);
3193  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3194  VA = RVLocs[++i]; // skip ahead to next loc
3195  Chain =
3196  DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3197  HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag);
3198  Flag = Chain.getValue(1);
3199  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3200  VA = RVLocs[++i]; // skip ahead to next loc
3201 
3202  // Extract the 2nd half and fall through to handle it as an f64 value.
3204  DAG.getConstant(1, dl, MVT::i32));
3205  }
3206  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3207  // available.
3208  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3209  DAG.getVTList(MVT::i32, MVT::i32), Arg);
3210  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3211  fmrrd.getValue(isLittleEndian ? 0 : 1), Flag);
3212  Flag = Chain.getValue(1);
3213  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3214  VA = RVLocs[++i]; // skip ahead to next loc
3215  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3216  fmrrd.getValue(isLittleEndian ? 1 : 0), Flag);
3217  } else
3218  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
3219 
3220  // Guarantee that all emitted copies are
3221  // stuck together, avoiding something bad.
3222  Flag = Chain.getValue(1);
3223  RetOps.push_back(DAG.getRegister(
3224  VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3225  }
3226  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3227  const MCPhysReg *I =
3228  TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3229  if (I) {
3230  for (; *I; ++I) {
3231  if (ARM::GPRRegClass.contains(*I))
3232  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3233  else if (ARM::DPRRegClass.contains(*I))
3234  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3235  else
3236  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3237  }
3238  }
3239 
3240  // Update chain and glue.
3241  RetOps[0] = Chain;
3242  if (Flag.getNode())
3243  RetOps.push_back(Flag);
3244 
3245  // CPUs which aren't M-class use a special sequence to return from
3246  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3247  // though we use "subs pc, lr, #N").
3248  //
3249  // M-class CPUs actually use a normal return sequence with a special
3250  // (hardware-provided) value in LR, so the normal code path works.
3251  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3252  !Subtarget->isMClass()) {
3253  if (Subtarget->isThumb1Only())
3254  report_fatal_error("interrupt attribute is not supported in Thumb1");
3255  return LowerInterruptReturn(RetOps, dl, DAG);
3256  }
3257 
3260  return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3261 }
3262 
3263 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3264  if (N->getNumValues() != 1)
3265  return false;
3266  if (!N->hasNUsesOfValue(1, 0))
3267  return false;
3268 
3269  SDValue TCChain = Chain;
3270  SDNode *Copy = *N->use_begin();
3271  if (Copy->getOpcode() == ISD::CopyToReg) {
3272  // If the copy has a glue operand, we conservatively assume it isn't safe to
3273  // perform a tail call.
3274  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3275  return false;
3276  TCChain = Copy->getOperand(0);
3277  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3278  SDNode *VMov = Copy;
3279  // f64 returned in a pair of GPRs.
3281  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
3282  UI != UE; ++UI) {
3283  if (UI->getOpcode() != ISD::CopyToReg)
3284  return false;
3285  Copies.insert(*UI);
3286  }
3287  if (Copies.size() > 2)
3288  return false;
3289 
3290  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
3291  UI != UE; ++UI) {
3292  SDValue UseChain = UI->getOperand(0);
3293  if (Copies.count(UseChain.getNode()))
3294  // Second CopyToReg
3295  Copy = *UI;
3296  else {
3297  // We are at the top of this chain.
3298  // If the copy has a glue operand, we conservatively assume it
3299  // isn't safe to perform a tail call.
3300  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
3301  return false;
3302  // First CopyToReg
3303  TCChain = UseChain;
3304  }
3305  }
3306  } else if (Copy->getOpcode() == ISD::BITCAST) {
3307  // f32 returned in a single GPR.
3308  if (!Copy->hasOneUse())
3309  return false;
3310  Copy = *Copy->use_begin();
3311  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3312  return false;
3313  // If the copy has a glue operand, we conservatively assume it isn't safe to
3314  // perform a tail call.
3315  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3316  return false;
3317  TCChain = Copy->getOperand(0);
3318  } else {
3319  return false;
3320  }
3321 
3322  bool HasRet = false;
3323  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
3324  UI != UE; ++UI) {
3325  if (UI->getOpcode() != ARMISD::RET_FLAG &&
3326  UI->getOpcode() != ARMISD::INTRET_FLAG)
3327  return false;
3328  HasRet = true;
3329  }
3330 
3331  if (!HasRet)
3332  return false;
3333 
3334  Chain = TCChain;
3335  return true;
3336 }
3337 
3338 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3339  if (!Subtarget->supportsTailCall())
3340  return false;
3341 
3342  if (!CI->isTailCall())
3343  return false;
3344 
3345  return true;
3346 }
3347 
3348 // Trying to write a 64 bit value so need to split into two 32 bit values first,
3349 // and pass the lower and high parts through.
3351  SDLoc DL(Op);
3352  SDValue WriteValue = Op->getOperand(2);
3353 
3354  // This function is only supposed to be called for i64 type argument.
3355  assert(WriteValue.getValueType() == MVT::i64
3356  && "LowerWRITE_REGISTER called for non-i64 type argument.");
3357 
3358  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3359  DAG.getConstant(0, DL, MVT::i32));
3360  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3361  DAG.getConstant(1, DL, MVT::i32));
3362  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3363  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3364 }
3365 
3366 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3367 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3368 // one of the above mentioned nodes. It has to be wrapped because otherwise
3369 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3370 // be used to form addressing mode. These wrapped nodes will be selected
3371 // into MOVi.
3372 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3373  SelectionDAG &DAG) const {
3374  EVT PtrVT = Op.getValueType();
3375  // FIXME there is no actual debug info here
3376  SDLoc dl(Op);
3377  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3378  SDValue Res;
3379 
3380  // When generating execute-only code Constant Pools must be promoted to the
3381  // global data section. It's a bit ugly that we can't share them across basic
3382  // blocks, but this way we guarantee that execute-only behaves correct with
3383  // position-independent addressing modes.
3384  if (Subtarget->genExecuteOnly()) {
3385  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3386  auto T = const_cast<Type*>(CP->getType());
3387  auto C = const_cast<Constant*>(CP->getConstVal());
3388  auto M = const_cast<Module*>(DAG.getMachineFunction().
3389  getFunction().getParent());
3390  auto GV = new GlobalVariable(
3391  *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3392  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
3393  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
3394  Twine(AFI->createPICLabelUId())
3395  );
3396  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3397  dl, PtrVT);
3398  return LowerGlobalAddress(GA, DAG);
3399  }
3400 
3401  if (CP->isMachineConstantPoolEntry())
3402  Res =
3403  DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3404  else
3405  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign());
3406  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3407 }
3408 
3411 }
3412 
3413 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3414  SelectionDAG &DAG) const {
3415  MachineFunction &MF = DAG.getMachineFunction();
3417  unsigned ARMPCLabelIndex = 0;
3418  SDLoc DL(Op);
3419  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3420  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3421  SDValue CPAddr;
3422  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3423  if (!IsPositionIndependent) {
3424  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3425  } else {
3426  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3427  ARMPCLabelIndex = AFI->createPICLabelUId();
3428  ARMConstantPoolValue *CPV =
3429  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3430  ARMCP::CPBlockAddress, PCAdj);
3431  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3432  }
3433  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3434  SDValue Result = DAG.getLoad(
3435  PtrVT, DL, DAG.getEntryNode(), CPAddr,
3437  if (!IsPositionIndependent)
3438  return Result;
3439  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3440  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3441 }
3442 
3443 /// Convert a TLS address reference into the correct sequence of loads
3444 /// and calls to compute the variable's address for Darwin, and return an
3445 /// SDValue containing the final node.
3446 
3447 /// Darwin only has one TLS scheme which must be capable of dealing with the
3448 /// fully general situation, in the worst case. This means:
3449 /// + "extern __thread" declaration.
3450 /// + Defined in a possibly unknown dynamic library.
3451 ///
3452 /// The general system is that each __thread variable has a [3 x i32] descriptor
3453 /// which contains information used by the runtime to calculate the address. The
3454 /// only part of this the compiler needs to know about is the first word, which
3455 /// contains a function pointer that must be called with the address of the
3456 /// entire descriptor in "r0".
3457 ///
3458 /// Since this descriptor may be in a different unit, in general access must
3459 /// proceed along the usual ARM rules. A common sequence to produce is:
3460 ///
3461 /// movw rT1, :lower16:_var$non_lazy_ptr
3462 /// movt rT1, :upper16:_var$non_lazy_ptr
3463 /// ldr r0, [rT1]
3464 /// ldr rT2, [r0]
3465 /// blx rT2
3466 /// [...address now in r0...]
3467 SDValue
3468 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3469  SelectionDAG &DAG) const {
3470  assert(Subtarget->isTargetDarwin() &&
3471  "This function expects a Darwin target");
3472  SDLoc DL(Op);
3473 
3474  // First step is to get the address of the actua global symbol. This is where
3475  // the TLS descriptor lives.
3476  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3477 
3478  // The first entry in the descriptor is a function pointer that we must call
3479  // to obtain the address of the variable.
3480  SDValue Chain = DAG.getEntryNode();
3481  SDValue FuncTLVGet = DAG.getLoad(
3482  MVT::i32, DL, Chain, DescAddr,
3486  Chain = FuncTLVGet.getValue(1);
3487 
3489  MachineFrameInfo &MFI = F.getFrameInfo();
3490  MFI.setAdjustsStack(true);
3491 
3492  // TLS calls preserve all registers except those that absolutely must be
3493  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3494  // silly).
3495  auto TRI =
3496  getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3497  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3499 
3500  // Finally, we can make the call. This is just a degenerate version of a
3501  // normal AArch64 call node: r0 takes the address of the descriptor, and
3502  // returns the address of the variable in this thread.
3503  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3504  Chain =
3506  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3507  DAG.getRegisterMask(Mask), Chain.getValue(1));
3508  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3509 }
3510 
3511 SDValue
3512 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3513  SelectionDAG &DAG) const {
3514  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3515 
3516  SDValue Chain = DAG.getEntryNode();
3517  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3518  SDLoc DL(Op);
3519 
3520  // Load the current TEB (thread environment block)
3521  SDValue Ops[] = {Chain,
3522  DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3523  DAG.getTargetConstant(15, DL, MVT::i32),
3524  DAG.getTargetConstant(0, DL, MVT::i32),
3525  DAG.getTargetConstant(13, DL, MVT::i32),
3526  DAG.getTargetConstant(0, DL, MVT::i32),
3527  DAG.getTargetConstant(2, DL, MVT::i32)};
3528  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3529  DAG.getVTList(MVT::i32, MVT::Other), Ops);
3530 
3531  SDValue TEB = CurrentTEB.getValue(0);
3532  Chain = CurrentTEB.getValue(1);
3533 
3534  // Load the ThreadLocalStoragePointer from the TEB
3535  // A pointer to the TLS array is located at offset 0x2c from the TEB.
3536  SDValue TLSArray =
3537  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3538  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3539 
3540  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3541  // offset into the TLSArray.
3542 
3543  // Load the TLS index from the C runtime
3544  SDValue TLSIndex =
3545  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3546  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3547  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3548 
3549  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3550  DAG.getConstant(2, DL, MVT::i32));
3551  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3552  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3553  MachinePointerInfo());
3554 
3555  // Get the offset of the start of the .tls section (section base)
3556  const auto *GA = cast<GlobalAddressSDNode>(Op);
3557  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3558  SDValue Offset = DAG.getLoad(
3559  PtrVT, DL, Chain,
3561  DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3563 
3564  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3565 }
3566 
3567 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
3568 SDValue
3569 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3570  SelectionDAG &DAG) const {
3571  SDLoc dl(GA);
3572  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3573  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3574  MachineFunction &MF = DAG.getMachineFunction();
3576  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3577  ARMConstantPoolValue *CPV =
3578  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3579  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3580  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3582  Argument = DAG.getLoad(
3583  PtrVT, dl, DAG.getEntryNode(), Argument,
3585  SDValue Chain = Argument.getValue(1);
3586 
3587  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3588  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3589 
3590  // call __tls_get_addr.
3591  ArgListTy Args;
3592  ArgListEntry Entry;
3593  Entry.Node = Argument;
3594  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3595  Args.push_back(Entry);
3596 
3597  // FIXME: is there useful debug info available here?
3599  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3601  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3602 
3603  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3604  return CallResult.first;
3605 }
3606 
3607 // Lower ISD::GlobalTLSAddress using the "initial exec" or
3608 // "local exec" model.
3609 SDValue
3610 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3611  SelectionDAG &DAG,
3612  TLSModel::Model model) const {
3613  const GlobalValue *GV = GA->getGlobal();
3614  SDLoc dl(GA);
3615  SDValue Offset;
3616  SDValue Chain = DAG.getEntryNode();
3617  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3618  // Get the Thread Pointer
3620 
3621  if (model == TLSModel::InitialExec) {
3622  MachineFunction &MF = DAG.getMachineFunction();
3624  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3625  // Initial exec model.
3626  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3627  ARMConstantPoolValue *CPV =
3628  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3630  true);
3631  Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3633  Offset = DAG.getLoad(
3634  PtrVT, dl, Chain, Offset,
3636  Chain = Offset.getValue(1);
3637 
3638  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3639  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3640 
3641  Offset = DAG.getLoad(
3642  PtrVT, dl, Chain, Offset,
3644  } else {
3645  // local exec model
3647  ARMConstantPoolValue *CPV =
3649  Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3651  Offset = DAG.getLoad(
3652  PtrVT, dl, Chain, Offset,
3654  }
3655 
3656  // The address of the thread local variable is the add of the thread
3657  // pointer with the offset of the variable.
3658  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3659 }
3660 
3661 SDValue
3662 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3663  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3664  if (DAG.getTarget().useEmulatedTLS())
3665  return LowerToTLSEmulatedModel(GA, DAG);
3666 
3667  if (Subtarget->isTargetDarwin())
3668  return LowerGlobalTLSAddressDarwin(Op, DAG);
3669 
3670  if (Subtarget->isTargetWindows())
3671  return LowerGlobalTLSAddressWindows(Op, DAG);
3672 
3673  // TODO: implement the "local dynamic" model
3674  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3676 
3677  switch (model) {
3680  return LowerToTLSGeneralDynamicModel(GA, DAG);
3681  case TLSModel::InitialExec:
3682  case TLSModel::LocalExec:
3683  return LowerToTLSExecModels(GA, DAG, model);
3684  }
3685  llvm_unreachable("bogus TLS model");
3686 }
3687 
3688 /// Return true if all users of V are within function F, looking through
3689 /// ConstantExprs.
3690 static bool allUsersAreInFunction(const Value *V, const Function *F) {
3691  SmallVector<const User*,4> Worklist(V->users());
3692  while (!Worklist.empty()) {
3693  auto *U = Worklist.pop_back_val();
3694  if (isa<ConstantExpr>(U)) {
3695  append_range(Worklist, U->users());
3696  continue;
3697  }
3698 
3699  auto *I = dyn_cast<Instruction>(U);
3700  if (!I || I->getParent()->getParent() != F)
3701  return false;
3702  }
3703  return true;
3704 }
3705 
3707  const GlobalValue *GV, SelectionDAG &DAG,
3708  EVT PtrVT, const SDLoc &dl) {
3709  // If we're creating a pool entry for a constant global with unnamed address,
3710  // and the global is small enough, we can emit it inline into the constant pool
3711  // to save ourselves an indirection.
3712  //
3713  // This is a win if the constant is only used in one function (so it doesn't
3714  // need to be duplicated) or duplicating the constant wouldn't increase code
3715  // size (implying the constant is no larger than 4 bytes).
3716  const Function &F = DAG.getMachineFunction().getFunction();
3717 
3718  // We rely on this decision to inline being idemopotent and unrelated to the
3719  // use-site. We know that if we inline a variable at one use site, we'll
3720  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3721  // doesn't know about this optimization, so bail out if it's enabled else
3722  // we could decide to inline here (and thus never emit the GV) but require
3723  // the GV from fast-isel generated code.
3724  if (!EnableConstpoolPromotion ||
3726  return SDValue();
3727 
3728  auto *GVar = dyn_cast<GlobalVariable>(GV);
3729  if (!GVar || !GVar->hasInitializer() ||
3730  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3731  !GVar->hasLocalLinkage())
3732  return SDValue();
3733 
3734  // If we inline a value that contains relocations, we move the relocations
3735  // from .data to .text. This is not allowed in position-independent code.
3736  auto *Init = GVar->getInitializer();
3737  if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3738  Init->needsDynamicRelocation())
3739  return SDValue();
3740 
3741  // The constant islands pass can only really deal with alignment requests
3742  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3743  // any type wanting greater alignment requirements than 4 bytes. We also
3744  // can only promote constants that are multiples of 4 bytes in size or
3745  // are paddable to a multiple of 4. Currently we only try and pad constants
3746  // that are strings for simplicity.
3747  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3748  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3749  Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3750  unsigned RequiredPadding = 4 - (Size % 4);
3751  bool PaddingPossible =
3752  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3753  if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3754  Size == 0)
3755  return SDValue();
3756 
3757  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3758  MachineFunction &MF = DAG.getMachineFunction();
3760 
3761  // We can't bloat the constant pool too much, else the ConstantIslands pass