LLVM  10.0.0svn
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that ARM uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMISelLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMCallingConv.h"
18 #include "ARMConstantPoolValue.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMPerfectShuffle.h"
21 #include "ARMRegisterInfo.h"
22 #include "ARMSelectionDAGInfo.h"
23 #include "ARMSubtarget.h"
26 #include "Utils/ARMBaseInfo.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/BitVector.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/ADT/SmallPtrSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
36 #include "llvm/ADT/StringExtras.h"
37 #include "llvm/ADT/StringRef.h"
38 #include "llvm/ADT/StringSwitch.h"
39 #include "llvm/ADT/Triple.h"
40 #include "llvm/ADT/Twine.h"
64 #include "llvm/IR/Attributes.h"
65 #include "llvm/IR/CallingConv.h"
66 #include "llvm/IR/Constant.h"
67 #include "llvm/IR/Constants.h"
68 #include "llvm/IR/DataLayout.h"
69 #include "llvm/IR/DebugLoc.h"
70 #include "llvm/IR/DerivedTypes.h"
71 #include "llvm/IR/Function.h"
72 #include "llvm/IR/GlobalAlias.h"
73 #include "llvm/IR/GlobalValue.h"
74 #include "llvm/IR/GlobalVariable.h"
75 #include "llvm/IR/IRBuilder.h"
76 #include "llvm/IR/InlineAsm.h"
77 #include "llvm/IR/Instruction.h"
78 #include "llvm/IR/Instructions.h"
79 #include "llvm/IR/IntrinsicInst.h"
80 #include "llvm/IR/Intrinsics.h"
81 #include "llvm/IR/Module.h"
82 #include "llvm/IR/PatternMatch.h"
83 #include "llvm/IR/Type.h"
84 #include "llvm/IR/User.h"
85 #include "llvm/IR/Value.h"
86 #include "llvm/MC/MCInstrDesc.h"
88 #include "llvm/MC/MCRegisterInfo.h"
89 #include "llvm/MC/MCSchedule.h"
92 #include "llvm/Support/Casting.h"
93 #include "llvm/Support/CodeGen.h"
95 #include "llvm/Support/Compiler.h"
96 #include "llvm/Support/Debug.h"
98 #include "llvm/Support/KnownBits.h"
100 #include "llvm/Support/MathExtras.h"
104 #include <algorithm>
105 #include <cassert>
106 #include <cstdint>
107 #include <cstdlib>
108 #include <iterator>
109 #include <limits>
110 #include <string>
111 #include <tuple>
112 #include <utility>
113 #include <vector>
114 
115 using namespace llvm;
116 using namespace llvm::PatternMatch;
117 
118 #define DEBUG_TYPE "arm-isel"
119 
120 STATISTIC(NumTailCalls, "Number of tail calls");
121 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
122 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
123 STATISTIC(NumConstpoolPromoted,
124  "Number of constants with their storage promoted into constant pools");
125 
126 static cl::opt<bool>
127 ARMInterworking("arm-interworking", cl::Hidden,
128  cl::desc("Enable / disable ARM interworking (for debugging only)"),
129  cl::init(true));
130 
132  "arm-promote-constant", cl::Hidden,
133  cl::desc("Enable / disable promotion of unnamed_addr constants into "
134  "constant pools"),
135  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
137  "arm-promote-constant-max-size", cl::Hidden,
138  cl::desc("Maximum size of constant to promote into a constant pool"),
139  cl::init(64));
141  "arm-promote-constant-max-total", cl::Hidden,
142  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
143  cl::init(128));
144 
145 // The APCS parameter registers.
146 static const MCPhysReg GPRArgRegs[] = {
147  ARM::R0, ARM::R1, ARM::R2, ARM::R3
148 };
149 
150 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
151  MVT PromotedBitwiseVT) {
152  if (VT != PromotedLdStVT) {
153  setOperationAction(ISD::LOAD, VT, Promote);
154  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
155 
156  setOperationAction(ISD::STORE, VT, Promote);
157  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
158  }
159 
160  MVT ElemTy = VT.getVectorElementType();
161  if (ElemTy != MVT::f64)
162  setOperationAction(ISD::SETCC, VT, Custom);
163  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
164  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
165  if (ElemTy == MVT::i32) {
166  setOperationAction(ISD::SINT_TO_FP, VT, Custom);
167  setOperationAction(ISD::UINT_TO_FP, VT, Custom);
168  setOperationAction(ISD::FP_TO_SINT, VT, Custom);
169  setOperationAction(ISD::FP_TO_UINT, VT, Custom);
170  } else {
171  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
172  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
173  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
174  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
175  }
176  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
177  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
178  setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
179  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
180  setOperationAction(ISD::SELECT, VT, Expand);
181  setOperationAction(ISD::SELECT_CC, VT, Expand);
182  setOperationAction(ISD::VSELECT, VT, Expand);
183  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
184  if (VT.isInteger()) {
185  setOperationAction(ISD::SHL, VT, Custom);
186  setOperationAction(ISD::SRA, VT, Custom);
187  setOperationAction(ISD::SRL, VT, Custom);
188  }
189 
190  // Promote all bit-wise operations.
191  if (VT.isInteger() && VT != PromotedBitwiseVT) {
192  setOperationAction(ISD::AND, VT, Promote);
193  AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
194  setOperationAction(ISD::OR, VT, Promote);
195  AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
196  setOperationAction(ISD::XOR, VT, Promote);
197  AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
198  }
199 
200  // Neon does not support vector divide/remainder operations.
201  setOperationAction(ISD::SDIV, VT, Expand);
202  setOperationAction(ISD::UDIV, VT, Expand);
203  setOperationAction(ISD::FDIV, VT, Expand);
204  setOperationAction(ISD::SREM, VT, Expand);
205  setOperationAction(ISD::UREM, VT, Expand);
206  setOperationAction(ISD::FREM, VT, Expand);
207 
208  if (!VT.isFloatingPoint() &&
209  VT != MVT::v2i64 && VT != MVT::v1i64)
210  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
211  setOperationAction(Opcode, VT, Legal);
212 }
213 
214 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
215  addRegisterClass(VT, &ARM::DPRRegClass);
216  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
217 }
218 
219 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
220  addRegisterClass(VT, &ARM::DPairRegClass);
221  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
222 }
223 
224 void ARMTargetLowering::setAllExpand(MVT VT) {
225  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
226  setOperationAction(Opc, VT, Expand);
227 
228  // We support these really simple operations even on types where all
229  // the actual arithmetic has to be broken down into simpler
230  // operations or turned into library calls.
231  setOperationAction(ISD::BITCAST, VT, Legal);
232  setOperationAction(ISD::LOAD, VT, Legal);
233  setOperationAction(ISD::STORE, VT, Legal);
234  setOperationAction(ISD::UNDEF, VT, Legal);
235 }
236 
237 void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
238  LegalizeAction Action) {
239  setLoadExtAction(ISD::EXTLOAD, From, To, Action);
240  setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
241  setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
242 }
243 
244 void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
245  const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
246 
247  for (auto VT : IntTypes) {
248  addRegisterClass(VT, &ARM::MQPRRegClass);
249  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
250  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
251  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
252  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
253  setOperationAction(ISD::SHL, VT, Custom);
254  setOperationAction(ISD::SRA, VT, Custom);
255  setOperationAction(ISD::SRL, VT, Custom);
256  setOperationAction(ISD::SMIN, VT, Legal);
257  setOperationAction(ISD::SMAX, VT, Legal);
258  setOperationAction(ISD::UMIN, VT, Legal);
259  setOperationAction(ISD::UMAX, VT, Legal);
260  setOperationAction(ISD::ABS, VT, Legal);
261  setOperationAction(ISD::SETCC, VT, Custom);
262  setOperationAction(ISD::MLOAD, VT, Custom);
263  setOperationAction(ISD::MSTORE, VT, Legal);
264  setOperationAction(ISD::CTLZ, VT, Legal);
265  setOperationAction(ISD::CTTZ, VT, Expand);
266  setOperationAction(ISD::BITREVERSE, VT, Legal);
267  setOperationAction(ISD::BSWAP, VT, Legal);
268 
269  // No native support for these.
270  setOperationAction(ISD::UDIV, VT, Expand);
271  setOperationAction(ISD::SDIV, VT, Expand);
272  setOperationAction(ISD::UREM, VT, Expand);
273  setOperationAction(ISD::SREM, VT, Expand);
274  setOperationAction(ISD::CTPOP, VT, Expand);
275 
276  // Vector reductions
277  setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
278  setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
279  setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
280  setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
281  setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
282 
283  if (!HasMVEFP) {
284  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
285  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
286  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
287  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
288  }
289 
290  // Pre and Post inc are supported on loads and stores
291  for (unsigned im = (unsigned)ISD::PRE_INC;
293  setIndexedLoadAction(im, VT, Legal);
294  setIndexedStoreAction(im, VT, Legal);
295  }
296  }
297 
298  const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
299  for (auto VT : FloatTypes) {
300  addRegisterClass(VT, &ARM::MQPRRegClass);
301  if (!HasMVEFP)
302  setAllExpand(VT);
303 
304  // These are legal or custom whether we have MVE.fp or not
305  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
306  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
307  setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
308  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
309  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
310  setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
311  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
312  setOperationAction(ISD::SETCC, VT, Custom);
313  setOperationAction(ISD::MLOAD, VT, Custom);
314  setOperationAction(ISD::MSTORE, VT, Legal);
315 
316  // Pre and Post inc are supported on loads and stores
317  for (unsigned im = (unsigned)ISD::PRE_INC;
319  setIndexedLoadAction(im, VT, Legal);
320  setIndexedStoreAction(im, VT, Legal);
321  }
322 
323  if (HasMVEFP) {
324  setOperationAction(ISD::FMINNUM, VT, Legal);
325  setOperationAction(ISD::FMAXNUM, VT, Legal);
326  setOperationAction(ISD::FROUND, VT, Legal);
327 
328  // No native support for these.
329  setOperationAction(ISD::FDIV, VT, Expand);
330  setOperationAction(ISD::FREM, VT, Expand);
331  setOperationAction(ISD::FSQRT, VT, Expand);
332  setOperationAction(ISD::FSIN, VT, Expand);
333  setOperationAction(ISD::FCOS, VT, Expand);
334  setOperationAction(ISD::FPOW, VT, Expand);
335  setOperationAction(ISD::FLOG, VT, Expand);
336  setOperationAction(ISD::FLOG2, VT, Expand);
337  setOperationAction(ISD::FLOG10, VT, Expand);
338  setOperationAction(ISD::FEXP, VT, Expand);
339  setOperationAction(ISD::FEXP2, VT, Expand);
340  setOperationAction(ISD::FNEARBYINT, VT, Expand);
341  }
342  }
343 
344  // We 'support' these types up to bitcast/load/store level, regardless of
345  // MVE integer-only / float support. Only doing FP data processing on the FP
346  // vector types is inhibited at integer-only level.
347  const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
348  for (auto VT : LongTypes) {
349  addRegisterClass(VT, &ARM::MQPRRegClass);
350  setAllExpand(VT);
351  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
352  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
353  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
354  }
355  // We can do bitwise operations on v2i64 vectors
356  setOperationAction(ISD::AND, MVT::v2i64, Legal);
357  setOperationAction(ISD::OR, MVT::v2i64, Legal);
358  setOperationAction(ISD::XOR, MVT::v2i64, Legal);
359 
360  // It is legal to extload from v4i8 to v4i16 or v4i32.
361  addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
362  addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
363  addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
364 
365  // Some truncating stores are legal too.
366  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
367  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
368  setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
369 
370  // Pre and Post inc on these are legal, given the correct extends
371  for (unsigned im = (unsigned)ISD::PRE_INC;
373  setIndexedLoadAction(im, MVT::v8i8, Legal);
374  setIndexedStoreAction(im, MVT::v8i8, Legal);
375  setIndexedLoadAction(im, MVT::v4i8, Legal);
376  setIndexedStoreAction(im, MVT::v4i8, Legal);
377  setIndexedLoadAction(im, MVT::v4i16, Legal);
378  setIndexedStoreAction(im, MVT::v4i16, Legal);
379  }
380 
381  // Predicate types
382  const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1};
383  for (auto VT : pTypes) {
384  addRegisterClass(VT, &ARM::VCCRRegClass);
385  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
386  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
387  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
388  setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
389  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
390  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
391  setOperationAction(ISD::SETCC, VT, Custom);
392  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
393  setOperationAction(ISD::LOAD, VT, Custom);
394  setOperationAction(ISD::STORE, VT, Custom);
395  }
396 }
397 
399  const ARMSubtarget &STI)
400  : TargetLowering(TM), Subtarget(&STI) {
401  RegInfo = Subtarget->getRegisterInfo();
402  Itins = Subtarget->getInstrItineraryData();
403 
406 
407  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
408  !Subtarget->isTargetWatchOS()) {
409  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
410  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
411  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
412  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
414  }
415 
416  if (Subtarget->isTargetMachO()) {
417  // Uses VFP for Thumb libfuncs if available.
418  if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
419  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
420  static const struct {
421  const RTLIB::Libcall Op;
422  const char * const Name;
423  const ISD::CondCode Cond;
424  } LibraryCalls[] = {
425  // Single-precision floating-point arithmetic.
426  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
427  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
428  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
429  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
430 
431  // Double-precision floating-point arithmetic.
432  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
433  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
434  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
435  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
436 
437  // Single-precision comparisons.
438  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
439  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
440  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
441  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
442  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
443  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
444  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
445  { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
446 
447  // Double-precision comparisons.
448  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
449  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
450  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
451  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
452  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
453  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
454  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
455  { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
456 
457  // Floating-point to integer conversions.
458  // i64 conversions are done via library routines even when generating VFP
459  // instructions, so use the same ones.
460  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
461  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
462  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
463  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
464 
465  // Conversions between floating types.
466  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
467  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
468 
469  // Integer to floating-point conversions.
470  // i64 conversions are done via library routines even when generating VFP
471  // instructions, so use the same ones.
472  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
473  // e.g., __floatunsidf vs. __floatunssidfvfp.
474  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
475  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
476  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
477  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
478  };
479 
480  for (const auto &LC : LibraryCalls) {
481  setLibcallName(LC.Op, LC.Name);
482  if (LC.Cond != ISD::SETCC_INVALID)
483  setCmpLibcallCC(LC.Op, LC.Cond);
484  }
485  }
486  }
487 
488  // These libcalls are not available in 32-bit.
489  setLibcallName(RTLIB::SHL_I128, nullptr);
490  setLibcallName(RTLIB::SRL_I128, nullptr);
491  setLibcallName(RTLIB::SRA_I128, nullptr);
492 
493  // RTLIB
494  if (Subtarget->isAAPCS_ABI() &&
495  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
496  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
497  static const struct {
498  const RTLIB::Libcall Op;
499  const char * const Name;
500  const CallingConv::ID CC;
501  const ISD::CondCode Cond;
502  } LibraryCalls[] = {
503  // Double-precision floating-point arithmetic helper functions
504  // RTABI chapter 4.1.2, Table 2
505  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
506  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
507  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
508  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
509 
510  // Double-precision floating-point comparison helper functions
511  // RTABI chapter 4.1.2, Table 3
512  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
513  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
514  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
515  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
516  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
517  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
518  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
519  { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
520 
521  // Single-precision floating-point arithmetic helper functions
522  // RTABI chapter 4.1.2, Table 4
523  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
524  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
525  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
526  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
527 
528  // Single-precision floating-point comparison helper functions
529  // RTABI chapter 4.1.2, Table 5
530  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
531  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
532  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
533  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
534  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
535  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
536  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
537  { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
538 
539  // Floating-point to integer conversions.
540  // RTABI chapter 4.1.2, Table 6
541  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
542  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
543  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
544  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
545  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
546  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
547  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
548  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
549 
550  // Conversions between floating types.
551  // RTABI chapter 4.1.2, Table 7
552  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
553  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
554  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
555 
556  // Integer to floating-point conversions.
557  // RTABI chapter 4.1.2, Table 8
558  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
559  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
560  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
561  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
562  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
563  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
564  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
565  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
566 
567  // Long long helper functions
568  // RTABI chapter 4.2, Table 9
569  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
570  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
571  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
572  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
573 
574  // Integer division functions
575  // RTABI chapter 4.3.1
576  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
577  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
578  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
579  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
580  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
581  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
582  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
583  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
584  };
585 
586  for (const auto &LC : LibraryCalls) {
587  setLibcallName(LC.Op, LC.Name);
588  setLibcallCallingConv(LC.Op, LC.CC);
589  if (LC.Cond != ISD::SETCC_INVALID)
590  setCmpLibcallCC(LC.Op, LC.Cond);
591  }
592 
593  // EABI dependent RTLIB
594  if (TM.Options.EABIVersion == EABI::EABI4 ||
596  static const struct {
597  const RTLIB::Libcall Op;
598  const char *const Name;
599  const CallingConv::ID CC;
600  const ISD::CondCode Cond;
601  } MemOpsLibraryCalls[] = {
602  // Memory operations
603  // RTABI chapter 4.3.4
605  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
606  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
607  };
608 
609  for (const auto &LC : MemOpsLibraryCalls) {
610  setLibcallName(LC.Op, LC.Name);
611  setLibcallCallingConv(LC.Op, LC.CC);
612  if (LC.Cond != ISD::SETCC_INVALID)
613  setCmpLibcallCC(LC.Op, LC.Cond);
614  }
615  }
616  }
617 
618  if (Subtarget->isTargetWindows()) {
619  static const struct {
620  const RTLIB::Libcall Op;
621  const char * const Name;
622  const CallingConv::ID CC;
623  } LibraryCalls[] = {
624  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
625  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
626  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
627  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
628  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
629  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
630  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
631  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
632  };
633 
634  for (const auto &LC : LibraryCalls) {
635  setLibcallName(LC.Op, LC.Name);
636  setLibcallCallingConv(LC.Op, LC.CC);
637  }
638  }
639 
640  // Use divmod compiler-rt calls for iOS 5.0 and later.
641  if (Subtarget->isTargetMachO() &&
642  !(Subtarget->isTargetIOS() &&
643  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
644  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
645  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
646  }
647 
648  // The half <-> float conversion functions are always soft-float on
649  // non-watchos platforms, but are needed for some targets which use a
650  // hard-float calling convention by default.
651  if (!Subtarget->isTargetWatchABI()) {
652  if (Subtarget->isAAPCS_ABI()) {
653  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
654  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
655  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
656  } else {
657  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
658  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
659  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
660  }
661  }
662 
663  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
664  // a __gnu_ prefix (which is the default).
665  if (Subtarget->isTargetAEABI()) {
666  static const struct {
667  const RTLIB::Libcall Op;
668  const char * const Name;
669  const CallingConv::ID CC;
670  } LibraryCalls[] = {
671  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
672  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
673  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
674  };
675 
676  for (const auto &LC : LibraryCalls) {
677  setLibcallName(LC.Op, LC.Name);
678  setLibcallCallingConv(LC.Op, LC.CC);
679  }
680  }
681 
682  if (Subtarget->isThumb1Only())
683  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
684  else
685  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
686 
687  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
688  Subtarget->hasFPRegs()) {
689  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
690  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
691  if (!Subtarget->hasVFP2Base())
692  setAllExpand(MVT::f32);
693  if (!Subtarget->hasFP64())
694  setAllExpand(MVT::f64);
695  }
696 
697  if (Subtarget->hasFullFP16()) {
698  addRegisterClass(MVT::f16, &ARM::HPRRegClass);
702 
705  }
706 
707  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
708  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
709  setTruncStoreAction(VT, InnerVT, Expand);
710  addAllExtLoads(VT, InnerVT, Expand);
711  }
712 
717 
719  }
720 
723 
726 
727  if (Subtarget->hasMVEIntegerOps())
728  addMVEVectorTypes(Subtarget->hasMVEFloatOps());
729 
730  // Combine low-overhead loop intrinsics so that we can lower i1 types.
731  if (Subtarget->hasLOB()) {
734  }
735 
736  if (Subtarget->hasNEON()) {
737  addDRTypeForNEON(MVT::v2f32);
738  addDRTypeForNEON(MVT::v8i8);
739  addDRTypeForNEON(MVT::v4i16);
740  addDRTypeForNEON(MVT::v2i32);
741  addDRTypeForNEON(MVT::v1i64);
742 
743  addQRTypeForNEON(MVT::v4f32);
744  addQRTypeForNEON(MVT::v2f64);
745  addQRTypeForNEON(MVT::v16i8);
746  addQRTypeForNEON(MVT::v8i16);
747  addQRTypeForNEON(MVT::v4i32);
748  addQRTypeForNEON(MVT::v2i64);
749 
750  if (Subtarget->hasFullFP16()) {
751  addQRTypeForNEON(MVT::v8f16);
752  addDRTypeForNEON(MVT::v4f16);
753  }
754  }
755 
756  if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
757  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
758  // none of Neon, MVE or VFP supports any arithmetic operations on it.
762  // FIXME: Code duplication: FDIV and FREM are expanded always, see
763  // ARMTargetLowering::addTypeForNEON method for details.
766  // FIXME: Create unittest.
767  // In another words, find a way when "copysign" appears in DAG with vector
768  // operands.
770  // FIXME: Code duplication: SETCC has custom operation action, see
771  // ARMTargetLowering::addTypeForNEON method for details.
773  // FIXME: Create unittest for FNEG and for FABS.
785  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
792  }
793 
794  if (Subtarget->hasNEON()) {
795  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
796  // supported for v4f32.
811 
812  // Mark v2f32 intrinsics.
827 
828  // Neon does not support some operations on v1i64 and v2i64 types.
830  // Custom handling for some quad-vector types to detect VMULL.
834  // Custom handling for some vector types to avoid expensive expansions
839  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
840  // a destination type that is wider than the source, and nor does
841  // it have a FP_TO_[SU]INT instruction with a narrower destination than
842  // source.
851 
854 
855  // NEON does not have single instruction CTPOP for vectors with element
856  // types wider than 8-bits. However, custom lowering can leverage the
857  // v8i8/v16i8 vcnt instruction.
864 
867 
868  // NEON does not have single instruction CTTZ for vectors.
873 
878 
883 
888 
889  // NEON only has FMA instructions as of VFP4.
890  if (!Subtarget->hasVFP4Base()) {
893  }
894 
909 
910  // It is legal to extload from v4i8 to v4i16 or v4i32.
912  MVT::v2i32}) {
917  }
918  }
919  }
920 
921  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
925  }
926 
927  if (!Subtarget->hasFP64()) {
928  // When targeting a floating-point unit with only single-precision
929  // operations, f64 is legal for the few double-precision instructions which
930  // are present However, no double-precision operations other than moves,
931  // loads and stores are provided by the hardware.
963  }
964 
965  if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
967  if (Subtarget->hasFullFP16())
969  }
970 
971  if (!Subtarget->hasFP16())
973 
974  if (!Subtarget->hasFP64())
976 
978 
979  // ARM does not have floating-point extending loads.
980  for (MVT VT : MVT::fp_valuetypes()) {
983  }
984 
985  // ... or truncating stores
989 
990  // ARM does not have i1 sign extending load.
991  for (MVT VT : MVT::integer_valuetypes())
993 
994  // ARM supports all 4 flavors of integer indexed load / store.
995  if (!Subtarget->isThumb1Only()) {
996  for (unsigned im = (unsigned)ISD::PRE_INC;
1006  }
1007  } else {
1008  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1011  }
1012 
1017 
1020 
1021  // i64 operation support.
1024  if (Subtarget->isThumb1Only()) {
1027  }
1028  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1029  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1031 
1039 
1040  // MVE lowers 64 bit shifts to lsll and lsrl
1041  // assuming that ISD::SRL and SRA of i64 are already marked custom
1042  if (Subtarget->hasMVEIntegerOps())
1044 
1045  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1046  if (Subtarget->isThumb1Only()) {
1050  }
1051 
1052  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1054 
1055  // ARM does not have ROTL.
1057  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1060  }
1063  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1066  }
1067 
1068  // @llvm.readcyclecounter requires the Performance Monitors extension.
1069  // Default to the 0 expansion on unsupported platforms.
1070  // FIXME: Technically there are older ARM CPUs that have
1071  // implementation-specific ways of obtaining this information.
1072  if (Subtarget->hasPerfMon())
1074 
1075  // Only ARMv6 has BSWAP.
1076  if (!Subtarget->hasV6Ops())
1078 
1079  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1080  : Subtarget->hasDivideInARMMode();
1081  if (!hasDivide) {
1082  // These are expanded into libcalls if the cpu doesn't have HW divider.
1085  }
1086 
1087  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1090 
1093  }
1094 
1097 
1098  // Register based DivRem for AEABI (RTABI 4.2)
1099  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1100  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1101  Subtarget->isTargetWindows()) {
1104  HasStandaloneRem = false;
1105 
1106  if (Subtarget->isTargetWindows()) {
1107  const struct {
1108  const RTLIB::Libcall Op;
1109  const char * const Name;
1110  const CallingConv::ID CC;
1111  } LibraryCalls[] = {
1112  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1113  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1114  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1115  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1116 
1117  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1118  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1119  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1120  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1121  };
1122 
1123  for (const auto &LC : LibraryCalls) {
1124  setLibcallName(LC.Op, LC.Name);
1125  setLibcallCallingConv(LC.Op, LC.CC);
1126  }
1127  } else {
1128  const struct {
1129  const RTLIB::Libcall Op;
1130  const char * const Name;
1131  const CallingConv::ID CC;
1132  } LibraryCalls[] = {
1133  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1134  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1135  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1136  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1137 
1138  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1139  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1140  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1141  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1142  };
1143 
1144  for (const auto &LC : LibraryCalls) {
1145  setLibcallName(LC.Op, LC.Name);
1146  setLibcallCallingConv(LC.Op, LC.CC);
1147  }
1148  }
1149 
1154  } else {
1157  }
1158 
1159  if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
1160  for (auto &VT : {MVT::f32, MVT::f64})
1162 
1167 
1170 
1171  // Use the default implementation.
1178 
1179  if (Subtarget->isTargetWindows())
1181  else
1183 
1184  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1185  // the default expansion.
1186  InsertFencesForAtomic = false;
1187  if (Subtarget->hasAnyDataBarrier() &&
1188  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1189  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1190  // to ldrex/strex loops already.
1192  if (!Subtarget->isThumb() || !Subtarget->isMClass())
1194 
1195  // On v8, we have particularly efficient implementations of atomic fences
1196  // if they can be combined with nearby atomic loads and stores.
1197  if (!Subtarget->hasAcquireRelease() ||
1198  getTargetMachine().getOptLevel() == 0) {
1199  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1200  InsertFencesForAtomic = true;
1201  }
1202  } else {
1203  // If there's anything we can use as a barrier, go through custom lowering
1204  // for ATOMIC_FENCE.
1205  // If target has DMB in thumb, Fences can be inserted.
1206  if (Subtarget->hasDataBarrier())
1207  InsertFencesForAtomic = true;
1208 
1210  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1211 
1212  // Set them all for expansion, which will force libcalls.
1225  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1226  // Unordered/Monotonic case.
1227  if (!InsertFencesForAtomic) {
1230  }
1231  }
1232 
1234 
1235  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1236  if (!Subtarget->hasV6Ops()) {
1239  }
1241 
1242  if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1243  !Subtarget->isThumb1Only()) {
1244  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1245  // iff target supports vfp2.
1248  }
1249 
1250  // We want to custom lower some of our intrinsics.
1255  if (Subtarget->useSjLjEH())
1256  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1257 
1267  if (Subtarget->hasFullFP16()) {
1271  }
1272 
1274 
1277  if (Subtarget->hasFullFP16())
1282 
1283  // We don't support sin/cos/fmod/copysign/pow
1292  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1293  !Subtarget->isThumb1Only()) {
1296  }
1299 
1300  if (!Subtarget->hasVFP4Base()) {
1303  }
1304 
1305  // Various VFP goodness
1306  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1307  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1308  if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1311  }
1312 
1313  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1314  if (!Subtarget->hasFP16()) {
1317  }
1318  }
1319 
1320  // Use __sincos_stret if available.
1321  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1322  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1325  }
1326 
1327  // FP-ARMv8 implements a lot of rounding-like FP operations.
1328  if (Subtarget->hasFPARMv8Base()) {
1337  if (Subtarget->hasNEON()) {
1342  }
1343 
1344  if (Subtarget->hasFP64()) {
1353  }
1354  }
1355 
1356  // FP16 often need to be promoted to call lib functions
1357  if (Subtarget->hasFullFP16()) {
1370 
1372  }
1373 
1374  if (Subtarget->hasNEON()) {
1375  // vmin and vmax aren't available in a scalar form, so we use
1376  // a NEON instruction with an undef lane instead.
1385 
1386  if (Subtarget->hasFullFP16()) {
1391 
1396  }
1397  }
1398 
1399  // We have target-specific dag combine patterns for the following nodes:
1400  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1407 
1408  if (Subtarget->hasV6Ops())
1410  if (Subtarget->isThumb1Only())
1412 
1414 
1415  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1416  !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1418  else
1420 
1421  //// temporary - rewrite interface to use type
1422  MaxStoresPerMemset = 8;
1424  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1426  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1428 
1429  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1430  // are at least 4 bytes aligned.
1432 
1433  // Prefer likely predicted branches to selects on out-of-order cores.
1434  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1435 
1437  llvm::Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1438 
1439  setMinFunctionAlignment(Subtarget->isThumb() ? llvm::Align(2)
1440  : llvm::Align(4));
1441 
1442  if (Subtarget->isThumb() || Subtarget->isThumb2())
1444 }
1445 
1447  return Subtarget->useSoftFloat();
1448 }
1449 
1450 // FIXME: It might make sense to define the representative register class as the
1451 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1452 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1453 // SPR's representative would be DPR_VFP2. This should work well if register
1454 // pressure tracking were modified such that a register use would increment the
1455 // pressure of the register class's representative and all of it's super
1456 // classes' representatives transitively. We have not implemented this because
1457 // of the difficulty prior to coalescing of modeling operand register classes
1458 // due to the common occurrence of cross class copies and subregister insertions
1459 // and extractions.
1460 std::pair<const TargetRegisterClass *, uint8_t>
1462  MVT VT) const {
1463  const TargetRegisterClass *RRC = nullptr;
1464  uint8_t Cost = 1;
1465  switch (VT.SimpleTy) {
1466  default:
1468  // Use DPR as representative register class for all floating point
1469  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1470  // the cost is 1 for both f32 and f64.
1471  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1472  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1473  RRC = &ARM::DPRRegClass;
1474  // When NEON is used for SP, only half of the register file is available
1475  // because operations that define both SP and DP results will be constrained
1476  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1477  // coalescing by double-counting the SP regs. See the FIXME above.
1478  if (Subtarget->useNEONForSinglePrecisionFP())
1479  Cost = 2;
1480  break;
1481  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1482  case MVT::v4f32: case MVT::v2f64:
1483  RRC = &ARM::DPRRegClass;
1484  Cost = 2;
1485  break;
1486  case MVT::v4i64:
1487  RRC = &ARM::DPRRegClass;
1488  Cost = 4;
1489  break;
1490  case MVT::v8i64:
1491  RRC = &ARM::DPRRegClass;
1492  Cost = 8;
1493  break;
1494  }
1495  return std::make_pair(RRC, Cost);
1496 }
1497 
1498 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1499  switch ((ARMISD::NodeType)Opcode) {
1500  case ARMISD::FIRST_NUMBER: break;
1501  case ARMISD::Wrapper: return "ARMISD::Wrapper";
1502  case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1503  case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1504  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1505  case ARMISD::CALL: return "ARMISD::CALL";
1506  case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1507  case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1508  case ARMISD::BRCOND: return "ARMISD::BRCOND";
1509  case ARMISD::BR_JT: return "ARMISD::BR_JT";
1510  case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1511  case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1512  case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1513  case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1514  case ARMISD::CMP: return "ARMISD::CMP";
1515  case ARMISD::CMN: return "ARMISD::CMN";
1516  case ARMISD::CMPZ: return "ARMISD::CMPZ";
1517  case ARMISD::CMPFP: return "ARMISD::CMPFP";
1518  case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1519  case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1520  case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1521 
1522  case ARMISD::CMOV: return "ARMISD::CMOV";
1523  case ARMISD::SUBS: return "ARMISD::SUBS";
1524 
1525  case ARMISD::SSAT: return "ARMISD::SSAT";
1526  case ARMISD::USAT: return "ARMISD::USAT";
1527 
1528  case ARMISD::ASRL: return "ARMISD::ASRL";
1529  case ARMISD::LSRL: return "ARMISD::LSRL";
1530  case ARMISD::LSLL: return "ARMISD::LSLL";
1531 
1532  case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1533  case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1534  case ARMISD::RRX: return "ARMISD::RRX";
1535 
1536  case ARMISD::ADDC: return "ARMISD::ADDC";
1537  case ARMISD::ADDE: return "ARMISD::ADDE";
1538  case ARMISD::SUBC: return "ARMISD::SUBC";
1539  case ARMISD::SUBE: return "ARMISD::SUBE";
1540  case ARMISD::LSLS: return "ARMISD::LSLS";
1541 
1542  case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1543  case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1544  case ARMISD::VMOVhr: return "ARMISD::VMOVhr";
1545  case ARMISD::VMOVrh: return "ARMISD::VMOVrh";
1546  case ARMISD::VMOVSR: return "ARMISD::VMOVSR";
1547 
1548  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1549  case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1550  case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1551 
1552  case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1553 
1554  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1555 
1556  case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1557 
1558  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1559 
1560  case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1561 
1562  case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1563  case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1564 
1565  case ARMISD::PREDICATE_CAST: return "ARMISD::PREDICATE_CAST";
1566  case ARMISD::VCMP: return "ARMISD::VCMP";
1567  case ARMISD::VCMPZ: return "ARMISD::VCMPZ";
1568  case ARMISD::VTST: return "ARMISD::VTST";
1569 
1570  case ARMISD::VSHLs: return "ARMISD::VSHLs";
1571  case ARMISD::VSHLu: return "ARMISD::VSHLu";
1572  case ARMISD::VSHLIMM: return "ARMISD::VSHLIMM";
1573  case ARMISD::VSHRsIMM: return "ARMISD::VSHRsIMM";
1574  case ARMISD::VSHRuIMM: return "ARMISD::VSHRuIMM";
1575  case ARMISD::VRSHRsIMM: return "ARMISD::VRSHRsIMM";
1576  case ARMISD::VRSHRuIMM: return "ARMISD::VRSHRuIMM";
1577  case ARMISD::VRSHRNIMM: return "ARMISD::VRSHRNIMM";
1578  case ARMISD::VQSHLsIMM: return "ARMISD::VQSHLsIMM";
1579  case ARMISD::VQSHLuIMM: return "ARMISD::VQSHLuIMM";
1580  case ARMISD::VQSHLsuIMM: return "ARMISD::VQSHLsuIMM";
1581  case ARMISD::VQSHRNsIMM: return "ARMISD::VQSHRNsIMM";
1582  case ARMISD::VQSHRNuIMM: return "ARMISD::VQSHRNuIMM";
1583  case ARMISD::VQSHRNsuIMM: return "ARMISD::VQSHRNsuIMM";
1584  case ARMISD::VQRSHRNsIMM: return "ARMISD::VQRSHRNsIMM";
1585  case ARMISD::VQRSHRNuIMM: return "ARMISD::VQRSHRNuIMM";
1586  case ARMISD::VQRSHRNsuIMM: return "ARMISD::VQRSHRNsuIMM";
1587  case ARMISD::VSLIIMM: return "ARMISD::VSLIIMM";
1588  case ARMISD::VSRIIMM: return "ARMISD::VSRIIMM";
1589  case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1590  case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1591  case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1592  case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1593  case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1594  case ARMISD::VDUP: return "ARMISD::VDUP";
1595  case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1596  case ARMISD::VEXT: return "ARMISD::VEXT";
1597  case ARMISD::VREV64: return "ARMISD::VREV64";
1598  case ARMISD::VREV32: return "ARMISD::VREV32";
1599  case ARMISD::VREV16: return "ARMISD::VREV16";
1600  case ARMISD::VZIP: return "ARMISD::VZIP";
1601  case ARMISD::VUZP: return "ARMISD::VUZP";
1602  case ARMISD::VTRN: return "ARMISD::VTRN";
1603  case ARMISD::VTBL1: return "ARMISD::VTBL1";
1604  case ARMISD::VTBL2: return "ARMISD::VTBL2";
1605  case ARMISD::VMULLs: return "ARMISD::VMULLs";
1606  case ARMISD::VMULLu: return "ARMISD::VMULLu";
1607  case ARMISD::UMAAL: return "ARMISD::UMAAL";
1608  case ARMISD::UMLAL: return "ARMISD::UMLAL";
1609  case ARMISD::SMLAL: return "ARMISD::SMLAL";
1610  case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1611  case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1612  case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1613  case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1614  case ARMISD::SMULWB: return "ARMISD::SMULWB";
1615  case ARMISD::SMULWT: return "ARMISD::SMULWT";
1616  case ARMISD::SMLALD: return "ARMISD::SMLALD";
1617  case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1618  case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1619  case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1620  case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
1621  case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1622  case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1623  case ARMISD::BFI: return "ARMISD::BFI";
1624  case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1625  case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1626  case ARMISD::VBSL: return "ARMISD::VBSL";
1627  case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1628  case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1629  case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1630  case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1631  case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1632  case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1633  case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1634  case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1635  case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1636  case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1637  case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1638  case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1639  case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1640  case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1641  case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1642  case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1643  case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1644  case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1645  case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1646  case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1647  case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1648  case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1649  case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1650  case ARMISD::WLS: return "ARMISD::WLS";
1651  case ARMISD::LE: return "ARMISD::LE";
1652  case ARMISD::LOOP_DEC: return "ARMISD::LOOP_DEC";
1653  case ARMISD::CSINV: return "ARMISD::CSINV";
1654  case ARMISD::CSNEG: return "ARMISD::CSNEG";
1655  case ARMISD::CSINC: return "ARMISD::CSINC";
1656  }
1657  return nullptr;
1658 }
1659 
1661  EVT VT) const {
1662  if (!VT.isVector())
1663  return getPointerTy(DL);
1664 
1665  // MVE has a predicate register.
1666  if (Subtarget->hasMVEIntegerOps() &&
1667  (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8))
1670 }
1671 
1672 /// getRegClassFor - Return the register class that should be used for the
1673 /// specified value type.
1674 const TargetRegisterClass *
1675 ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1676  (void)isDivergent;
1677  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1678  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1679  // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1680  // MVE Q registers.
1681  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1682  if (VT == MVT::v4i64)
1683  return &ARM::QQPRRegClass;
1684  if (VT == MVT::v8i64)
1685  return &ARM::QQQQPRRegClass;
1686  }
1687  return TargetLowering::getRegClassFor(VT);
1688 }
1689 
1690 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1691 // source/dest is aligned and the copy size is large enough. We therefore want
1692 // to align such objects passed to memory intrinsics.
1694  unsigned &PrefAlign) const {
1695  if (!isa<MemIntrinsic>(CI))
1696  return false;
1697  MinSize = 8;
1698  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1699  // cycle faster than 4-byte aligned LDM.
1700  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1701  return true;
1702 }
1703 
1704 // Create a fast isel object.
1705 FastISel *
1707  const TargetLibraryInfo *libInfo) const {
1708  return ARM::createFastISel(funcInfo, libInfo);
1709 }
1710 
1712  unsigned NumVals = N->getNumValues();
1713  if (!NumVals)
1714  return Sched::RegPressure;
1715 
1716  for (unsigned i = 0; i != NumVals; ++i) {
1717  EVT VT = N->getValueType(i);
1718  if (VT == MVT::Glue || VT == MVT::Other)
1719  continue;
1720  if (VT.isFloatingPoint() || VT.isVector())
1721  return Sched::ILP;
1722  }
1723 
1724  if (!N->isMachineOpcode())
1725  return Sched::RegPressure;
1726 
1727  // Load are scheduled for latency even if there instruction itinerary
1728  // is not available.
1729  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1730  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1731 
1732  if (MCID.getNumDefs() == 0)
1733  return Sched::RegPressure;
1734  if (!Itins->isEmpty() &&
1735  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1736  return Sched::ILP;
1737 
1738  return Sched::RegPressure;
1739 }
1740 
1741 //===----------------------------------------------------------------------===//
1742 // Lowering Code
1743 //===----------------------------------------------------------------------===//
1744 
1745 static bool isSRL16(const SDValue &Op) {
1746  if (Op.getOpcode() != ISD::SRL)
1747  return false;
1748  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1749  return Const->getZExtValue() == 16;
1750  return false;
1751 }
1752 
1753 static bool isSRA16(const SDValue &Op) {
1754  if (Op.getOpcode() != ISD::SRA)
1755  return false;
1756  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1757  return Const->getZExtValue() == 16;
1758  return false;
1759 }
1760 
1761 static bool isSHL16(const SDValue &Op) {
1762  if (Op.getOpcode() != ISD::SHL)
1763  return false;
1764  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1765  return Const->getZExtValue() == 16;
1766  return false;
1767 }
1768 
1769 // Check for a signed 16-bit value. We special case SRA because it makes it
1770 // more simple when also looking for SRAs that aren't sign extending a
1771 // smaller value. Without the check, we'd need to take extra care with
1772 // checking order for some operations.
1773 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1774  if (isSRA16(Op))
1775  return isSHL16(Op.getOperand(0));
1776  return DAG.ComputeNumSignBits(Op) == 17;
1777 }
1778 
1779 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1781  switch (CC) {
1782  default: llvm_unreachable("Unknown condition code!");
1783  case ISD::SETNE: return ARMCC::NE;
1784  case ISD::SETEQ: return ARMCC::EQ;
1785  case ISD::SETGT: return ARMCC::GT;
1786  case ISD::SETGE: return ARMCC::GE;
1787  case ISD::SETLT: return ARMCC::LT;
1788  case ISD::SETLE: return ARMCC::LE;
1789  case ISD::SETUGT: return ARMCC::HI;
1790  case ISD::SETUGE: return ARMCC::HS;
1791  case ISD::SETULT: return ARMCC::LO;
1792  case ISD::SETULE: return ARMCC::LS;
1793  }
1794 }
1795 
1796 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1798  ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1799  CondCode2 = ARMCC::AL;
1800  InvalidOnQNaN = true;
1801  switch (CC) {
1802  default: llvm_unreachable("Unknown FP condition!");
1803  case ISD::SETEQ:
1804  case ISD::SETOEQ:
1805  CondCode = ARMCC::EQ;
1806  InvalidOnQNaN = false;
1807  break;
1808  case ISD::SETGT:
1809  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1810  case ISD::SETGE:
1811  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1812  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1813  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1814  case ISD::SETONE:
1815  CondCode = ARMCC::MI;
1816  CondCode2 = ARMCC::GT;
1817  InvalidOnQNaN = false;
1818  break;
1819  case ISD::SETO: CondCode = ARMCC::VC; break;
1820  case ISD::SETUO: CondCode = ARMCC::VS; break;
1821  case ISD::SETUEQ:
1822  CondCode = ARMCC::EQ;
1823  CondCode2 = ARMCC::VS;
1824  InvalidOnQNaN = false;
1825  break;
1826  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1827  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1828  case ISD::SETLT:
1829  case ISD::SETULT: CondCode = ARMCC::LT; break;
1830  case ISD::SETLE:
1831  case ISD::SETULE: CondCode = ARMCC::LE; break;
1832  case ISD::SETNE:
1833  case ISD::SETUNE:
1834  CondCode = ARMCC::NE;
1835  InvalidOnQNaN = false;
1836  break;
1837  }
1838 }
1839 
1840 //===----------------------------------------------------------------------===//
1841 // Calling Convention Implementation
1842 //===----------------------------------------------------------------------===//
1843 
1844 /// getEffectiveCallingConv - Get the effective calling convention, taking into
1845 /// account presence of floating point hardware and calling convention
1846 /// limitations, such as support for variadic functions.
1848 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1849  bool isVarArg) const {
1850  switch (CC) {
1851  default:
1852  report_fatal_error("Unsupported calling convention");
1854  case CallingConv::ARM_APCS:
1855  case CallingConv::GHC:
1856  return CC;
1860  case CallingConv::Swift:
1862  case CallingConv::C:
1863  if (!Subtarget->isAAPCS_ABI())
1864  return CallingConv::ARM_APCS;
1865  else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
1867  !isVarArg)
1869  else
1870  return CallingConv::ARM_AAPCS;
1871  case CallingConv::Fast:
1873  if (!Subtarget->isAAPCS_ABI()) {
1874  if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
1875  return CallingConv::Fast;
1876  return CallingConv::ARM_APCS;
1877  } else if (Subtarget->hasVFP2Base() &&
1878  !Subtarget->isThumb1Only() && !isVarArg)
1880  else
1881  return CallingConv::ARM_AAPCS;
1882  }
1883 }
1884 
1886  bool isVarArg) const {
1887  return CCAssignFnForNode(CC, false, isVarArg);
1888 }
1889 
1891  bool isVarArg) const {
1892  return CCAssignFnForNode(CC, true, isVarArg);
1893 }
1894 
1895 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1896 /// CallingConvention.
1897 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1898  bool Return,
1899  bool isVarArg) const {
1900  switch (getEffectiveCallingConv(CC, isVarArg)) {
1901  default:
1902  report_fatal_error("Unsupported calling convention");
1903  case CallingConv::ARM_APCS:
1904  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1906  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1908  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1909  case CallingConv::Fast:
1910  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1911  case CallingConv::GHC:
1912  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1914  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1915  }
1916 }
1917 
1918 /// LowerCallResult - Lower the result values of a call into the
1919 /// appropriate copies out of appropriate physical registers.
1920 SDValue ARMTargetLowering::LowerCallResult(
1921  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1922  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1923  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1924  SDValue ThisVal) const {
1925  // Assign locations to each value returned by this call.
1927  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1928  *DAG.getContext());
1929  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1930 
1931  // Copy all of the result registers out of their specified physreg.
1932  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1933  CCValAssign VA = RVLocs[i];
1934 
1935  // Pass 'this' value directly from the argument to return value, to avoid
1936  // reg unit interference
1937  if (i == 0 && isThisReturn) {
1938  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1939  "unexpected return calling convention register assignment");
1940  InVals.push_back(ThisVal);
1941  continue;
1942  }
1943 
1944  SDValue Val;
1945  if (VA.needsCustom()) {
1946  // Handle f64 or half of a v2f64.
1947  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1948  InFlag);
1949  Chain = Lo.getValue(1);
1950  InFlag = Lo.getValue(2);
1951  VA = RVLocs[++i]; // skip ahead to next loc
1952  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1953  InFlag);
1954  Chain = Hi.getValue(1);
1955  InFlag = Hi.getValue(2);
1956  if (!Subtarget->isLittle())
1957  std::swap (Lo, Hi);
1958  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1959 
1960  if (VA.getLocVT() == MVT::v2f64) {
1961  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1962  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1963  DAG.getConstant(0, dl, MVT::i32));
1964 
1965  VA = RVLocs[++i]; // skip ahead to next loc
1966  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1967  Chain = Lo.getValue(1);
1968  InFlag = Lo.getValue(2);
1969  VA = RVLocs[++i]; // skip ahead to next loc
1970  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1971  Chain = Hi.getValue(1);
1972  InFlag = Hi.getValue(2);
1973  if (!Subtarget->isLittle())
1974  std::swap (Lo, Hi);
1975  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1976  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1977  DAG.getConstant(1, dl, MVT::i32));
1978  }
1979  } else {
1980  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1981  InFlag);
1982  Chain = Val.getValue(1);
1983  InFlag = Val.getValue(2);
1984  }
1985 
1986  switch (VA.getLocInfo()) {
1987  default: llvm_unreachable("Unknown loc info!");
1988  case CCValAssign::Full: break;
1989  case CCValAssign::BCvt:
1990  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1991  break;
1992  }
1993 
1994  InVals.push_back(Val);
1995  }
1996 
1997  return Chain;
1998 }
1999 
2000 /// LowerMemOpCallTo - Store the argument to the stack.
2001 SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
2002  SDValue Arg, const SDLoc &dl,
2003  SelectionDAG &DAG,
2004  const CCValAssign &VA,
2005  ISD::ArgFlagsTy Flags) const {
2006  unsigned LocMemOffset = VA.getLocMemOffset();
2007  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2008  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2009  StackPtr, PtrOff);
2010  return DAG.getStore(
2011  Chain, dl, Arg, PtrOff,
2012  MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
2013 }
2014 
2015 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2016  SDValue Chain, SDValue &Arg,
2017  RegsToPassVector &RegsToPass,
2018  CCValAssign &VA, CCValAssign &NextVA,
2019  SDValue &StackPtr,
2020  SmallVectorImpl<SDValue> &MemOpChains,
2021  ISD::ArgFlagsTy Flags) const {
2022  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2023  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2024  unsigned id = Subtarget->isLittle() ? 0 : 1;
2025  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2026 
2027  if (NextVA.isRegLoc())
2028  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2029  else {
2030  assert(NextVA.isMemLoc());
2031  if (!StackPtr.getNode())
2032  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2033  getPointerTy(DAG.getDataLayout()));
2034 
2035  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
2036  dl, DAG, NextVA,
2037  Flags));
2038  }
2039 }
2040 
2041 /// LowerCall - Lowering a call into a callseq_start <-
2042 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2043 /// nodes.
2044 SDValue
2045 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2046  SmallVectorImpl<SDValue> &InVals) const {
2047  SelectionDAG &DAG = CLI.DAG;
2048  SDLoc &dl = CLI.DL;
2050  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2052  SDValue Chain = CLI.Chain;
2053  SDValue Callee = CLI.Callee;
2054  bool &isTailCall = CLI.IsTailCall;
2055  CallingConv::ID CallConv = CLI.CallConv;
2056  bool doesNotRet = CLI.DoesNotReturn;
2057  bool isVarArg = CLI.IsVarArg;
2058 
2059  MachineFunction &MF = DAG.getMachineFunction();
2060  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2061  bool isThisReturn = false;
2062  auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
2063  bool PreferIndirect = false;
2064 
2065  // Disable tail calls if they're not supported.
2066  if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
2067  isTailCall = false;
2068 
2069  if (isa<GlobalAddressSDNode>(Callee)) {
2070  // If we're optimizing for minimum size and the function is called three or
2071  // more times in this block, we can improve codesize by calling indirectly
2072  // as BLXr has a 16-bit encoding.
2073  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2074  if (CLI.CS) {
2075  auto *BB = CLI.CS.getParent();
2076  PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2077  count_if(GV->users(), [&BB](const User *U) {
2078  return isa<Instruction>(U) &&
2079  cast<Instruction>(U)->getParent() == BB;
2080  }) > 2;
2081  }
2082  }
2083  if (isTailCall) {
2084  // Check if it's really possible to do a tail call.
2085  isTailCall = IsEligibleForTailCallOptimization(
2086  Callee, CallConv, isVarArg, isStructRet,
2087  MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2088  PreferIndirect);
2089  if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
2090  report_fatal_error("failed to perform tail call elimination on a call "
2091  "site marked musttail");
2092  // We don't support GuaranteedTailCallOpt for ARM, only automatically
2093  // detected sibcalls.
2094  if (isTailCall)
2095  ++NumTailCalls;
2096  }
2097 
2098  // Analyze operands of the call, assigning locations to each operand.
2100  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2101  *DAG.getContext());
2102  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2103 
2104  // Get a count of how many bytes are to be pushed on the stack.
2105  unsigned NumBytes = CCInfo.getNextStackOffset();
2106 
2107  if (isTailCall) {
2108  // For tail calls, memory operands are available in our caller's stack.
2109  NumBytes = 0;
2110  } else {
2111  // Adjust the stack pointer for the new arguments...
2112  // These operations are automatically eliminated by the prolog/epilog pass
2113  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
2114  }
2115 
2116  SDValue StackPtr =
2117  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2118 
2119  RegsToPassVector RegsToPass;
2120  SmallVector<SDValue, 8> MemOpChains;
2121 
2122  // Walk the register/memloc assignments, inserting copies/loads. In the case
2123  // of tail call optimization, arguments are handled later.
2124  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2125  i != e;
2126  ++i, ++realArgIdx) {
2127  CCValAssign &VA = ArgLocs[i];
2128  SDValue Arg = OutVals[realArgIdx];
2129  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2130  bool isByVal = Flags.isByVal();
2131 
2132  // Promote the value if needed.
2133  switch (VA.getLocInfo()) {
2134  default: llvm_unreachable("Unknown loc info!");
2135  case CCValAssign::Full: break;
2136  case CCValAssign::SExt:
2137  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2138  break;
2139  case CCValAssign::ZExt:
2140  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2141  break;
2142  case CCValAssign::AExt:
2143  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2144  break;
2145  case CCValAssign::BCvt:
2146  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2147  break;
2148  }
2149 
2150  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2151  if (VA.needsCustom()) {
2152  if (VA.getLocVT() == MVT::v2f64) {
2153  SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2154  DAG.getConstant(0, dl, MVT::i32));
2155  SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2156  DAG.getConstant(1, dl, MVT::i32));
2157 
2158  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
2159  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
2160 
2161  VA = ArgLocs[++i]; // skip ahead to next loc
2162  if (VA.isRegLoc()) {
2163  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
2164  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
2165  } else {
2166  assert(VA.isMemLoc());
2167 
2168  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
2169  dl, DAG, VA, Flags));
2170  }
2171  } else {
2172  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2173  StackPtr, MemOpChains, Flags);
2174  }
2175  } else if (VA.isRegLoc()) {
2176  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2177  Outs[0].VT == MVT::i32) {
2178  assert(VA.getLocVT() == MVT::i32 &&
2179  "unexpected calling convention register assignment");
2180  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2181  "unexpected use of 'returned'");
2182  isThisReturn = true;
2183  }
2184  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2185  } else if (isByVal) {
2186  assert(VA.isMemLoc());
2187  unsigned offset = 0;
2188 
2189  // True if this byval aggregate will be split between registers
2190  // and memory.
2191  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2192  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2193 
2194  if (CurByValIdx < ByValArgsCount) {
2195 
2196  unsigned RegBegin, RegEnd;
2197  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2198 
2199  EVT PtrVT =
2201  unsigned int i, j;
2202  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2203  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2204  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2205  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
2207  DAG.InferPtrAlignment(AddArg));
2208  MemOpChains.push_back(Load.getValue(1));
2209  RegsToPass.push_back(std::make_pair(j, Load));
2210  }
2211 
2212  // If parameter size outsides register area, "offset" value
2213  // helps us to calculate stack slot for remained part properly.
2214  offset = RegEnd - RegBegin;
2215 
2216  CCInfo.nextInRegsParam();
2217  }
2218 
2219  if (Flags.getByValSize() > 4*offset) {
2220  auto PtrVT = getPointerTy(DAG.getDataLayout());
2221  unsigned LocMemOffset = VA.getLocMemOffset();
2222  SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2223  SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
2224  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2225  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2226  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2227  MVT::i32);
2228  SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
2229  MVT::i32);
2230 
2231  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2232  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2233  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2234  Ops));
2235  }
2236  } else if (!isTailCall) {
2237  assert(VA.isMemLoc());
2238 
2239  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2240  dl, DAG, VA, Flags));
2241  }
2242  }
2243 
2244  if (!MemOpChains.empty())
2245  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2246 
2247  // Build a sequence of copy-to-reg nodes chained together with token chain
2248  // and flag operands which copy the outgoing args into the appropriate regs.
2249  SDValue InFlag;
2250  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2251  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2252  RegsToPass[i].second, InFlag);
2253  InFlag = Chain.getValue(1);
2254  }
2255 
2256  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2257  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2258  // node so that legalize doesn't hack it.
2259  bool isDirect = false;
2260 
2261  const TargetMachine &TM = getTargetMachine();
2262  const Module *Mod = MF.getFunction().getParent();
2263  const GlobalValue *GV = nullptr;
2264  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2265  GV = G->getGlobal();
2266  bool isStub =
2267  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2268 
2269  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2270  bool isLocalARMFunc = false;
2272  auto PtrVt = getPointerTy(DAG.getDataLayout());
2273 
2274  if (Subtarget->genLongCalls()) {
2275  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2276  "long-calls codegen is not position independent!");
2277  // Handle a global address or an external symbol. If it's not one of
2278  // those, the target's already in a register, so we don't need to do
2279  // anything extra.
2280  if (isa<GlobalAddressSDNode>(Callee)) {
2281  // Create a constant pool entry for the callee address
2282  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2283  ARMConstantPoolValue *CPV =
2284  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2285 
2286  // Get the address of the callee into a register
2287  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2288  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2289  Callee = DAG.getLoad(
2290  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2292  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2293  const char *Sym = S->getSymbol();
2294 
2295  // Create a constant pool entry for the callee address
2296  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2297  ARMConstantPoolValue *CPV =
2299  ARMPCLabelIndex, 0);
2300  // Get the address of the callee into a register
2301  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2302  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2303  Callee = DAG.getLoad(
2304  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2306  }
2307  } else if (isa<GlobalAddressSDNode>(Callee)) {
2308  if (!PreferIndirect) {
2309  isDirect = true;
2310  bool isDef = GV->isStrongDefinitionForLinker();
2311 
2312  // ARM call to a local ARM function is predicable.
2313  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2314  // tBX takes a register source operand.
2315  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2316  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2317  Callee = DAG.getNode(
2318  ARMISD::WrapperPIC, dl, PtrVt,
2319  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2320  Callee = DAG.getLoad(
2321  PtrVt, dl, DAG.getEntryNode(), Callee,
2323  /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2325  } else if (Subtarget->isTargetCOFF()) {
2326  assert(Subtarget->isTargetWindows() &&
2327  "Windows is the only supported COFF target");
2328  unsigned TargetFlags = GV->hasDLLImportStorageClass()
2331  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
2332  TargetFlags);
2333  if (GV->hasDLLImportStorageClass())
2334  Callee =
2335  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2336  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2338  } else {
2339  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2340  }
2341  }
2342  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2343  isDirect = true;
2344  // tBX takes a register source operand.
2345  const char *Sym = S->getSymbol();
2346  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2347  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2348  ARMConstantPoolValue *CPV =
2350  ARMPCLabelIndex, 4);
2351  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2352  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2353  Callee = DAG.getLoad(
2354  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2356  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2357  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2358  } else {
2359  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2360  }
2361  }
2362 
2363  // FIXME: handle tail calls differently.
2364  unsigned CallOpc;
2365  if (Subtarget->isThumb()) {
2366  if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2367  CallOpc = ARMISD::CALL_NOLINK;
2368  else
2369  CallOpc = ARMISD::CALL;
2370  } else {
2371  if (!isDirect && !Subtarget->hasV5TOps())
2372  CallOpc = ARMISD::CALL_NOLINK;
2373  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2374  // Emit regular call when code size is the priority
2375  !Subtarget->hasMinSize())
2376  // "mov lr, pc; b _foo" to avoid confusing the RSP
2377  CallOpc = ARMISD::CALL_NOLINK;
2378  else
2379  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2380  }
2381 
2382  std::vector<SDValue> Ops;
2383  Ops.push_back(Chain);
2384  Ops.push_back(Callee);
2385 
2386  // Add argument registers to the end of the list so that they are known live
2387  // into the call.
2388  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2389  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2390  RegsToPass[i].second.getValueType()));
2391 
2392  // Add a register mask operand representing the call-preserved registers.
2393  if (!isTailCall) {
2394  const uint32_t *Mask;
2395  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2396  if (isThisReturn) {
2397  // For 'this' returns, use the R0-preserving mask if applicable
2398  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2399  if (!Mask) {
2400  // Set isThisReturn to false if the calling convention is not one that
2401  // allows 'returned' to be modeled in this way, so LowerCallResult does
2402  // not try to pass 'this' straight through
2403  isThisReturn = false;
2404  Mask = ARI->getCallPreservedMask(MF, CallConv);
2405  }
2406  } else
2407  Mask = ARI->getCallPreservedMask(MF, CallConv);
2408 
2409  assert(Mask && "Missing call preserved mask for calling convention");
2410  Ops.push_back(DAG.getRegisterMask(Mask));
2411  }
2412 
2413  if (InFlag.getNode())
2414  Ops.push_back(InFlag);
2415 
2416  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2417  if (isTailCall) {
2419  return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2420  }
2421 
2422  // Returns a chain and a flag for retval copy to use.
2423  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2424  InFlag = Chain.getValue(1);
2425 
2426  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2427  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2428  if (!Ins.empty())
2429  InFlag = Chain.getValue(1);
2430 
2431  // Handle result values, copying them out of physregs into vregs that we
2432  // return.
2433  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2434  InVals, isThisReturn,
2435  isThisReturn ? OutVals[0] : SDValue());
2436 }
2437 
2438 /// HandleByVal - Every parameter *after* a byval parameter is passed
2439 /// on the stack. Remember the next parameter register to allocate,
2440 /// and then confiscate the rest of the parameter registers to insure
2441 /// this.
2442 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2443  unsigned Align) const {
2444  // Byval (as with any stack) slots are always at least 4 byte aligned.
2445  Align = std::max(Align, 4U);
2446 
2447  unsigned Reg = State->AllocateReg(GPRArgRegs);
2448  if (!Reg)
2449  return;
2450 
2451  unsigned AlignInRegs = Align / 4;
2452  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2453  for (unsigned i = 0; i < Waste; ++i)
2454  Reg = State->AllocateReg(GPRArgRegs);
2455 
2456  if (!Reg)
2457  return;
2458 
2459  unsigned Excess = 4 * (ARM::R4 - Reg);
2460 
2461  // Special case when NSAA != SP and parameter size greater than size of
2462  // all remained GPR regs. In that case we can't split parameter, we must
2463  // send it to stack. We also must set NCRN to R4, so waste all
2464  // remained registers.
2465  const unsigned NSAAOffset = State->getNextStackOffset();
2466  if (NSAAOffset != 0 && Size > Excess) {
2467  while (State->AllocateReg(GPRArgRegs))
2468  ;
2469  return;
2470  }
2471 
2472  // First register for byval parameter is the first register that wasn't
2473  // allocated before this method call, so it would be "reg".
2474  // If parameter is small enough to be saved in range [reg, r4), then
2475  // the end (first after last) register would be reg + param-size-in-regs,
2476  // else parameter would be splitted between registers and stack,
2477  // end register would be r4 in this case.
2478  unsigned ByValRegBegin = Reg;
2479  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2480  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2481  // Note, first register is allocated in the beginning of function already,
2482  // allocate remained amount of registers we need.
2483  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2484  State->AllocateReg(GPRArgRegs);
2485  // A byval parameter that is split between registers and memory needs its
2486  // size truncated here.
2487  // In the case where the entire structure fits in registers, we set the
2488  // size in memory to zero.
2489  Size = std::max<int>(Size - Excess, 0);
2490 }
2491 
2492 /// MatchingStackOffset - Return true if the given stack call argument is
2493 /// already available in the same position (relatively) of the caller's
2494 /// incoming argument stack.
2495 static
2498  const TargetInstrInfo *TII) {
2499  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2500  int FI = std::numeric_limits<int>::max();
2501  if (Arg.getOpcode() == ISD::CopyFromReg) {
2502  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2503  if (!Register::isVirtualRegister(VR))
2504  return false;
2505  MachineInstr *Def = MRI->getVRegDef(VR);
2506  if (!Def)
2507  return false;
2508  if (!Flags.isByVal()) {
2509  if (!TII->isLoadFromStackSlot(*Def, FI))
2510  return false;
2511  } else {
2512  return false;
2513  }
2514  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2515  if (Flags.isByVal())
2516  // ByVal argument is passed in as a pointer but it's now being
2517  // dereferenced. e.g.
2518  // define @foo(%struct.X* %A) {
2519  // tail call @bar(%struct.X* byval %A)
2520  // }
2521  return false;
2522  SDValue Ptr = Ld->getBasePtr();
2523  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2524  if (!FINode)
2525  return false;
2526  FI = FINode->getIndex();
2527  } else
2528  return false;
2529 
2531  if (!MFI.isFixedObjectIndex(FI))
2532  return false;
2533  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2534 }
2535 
2536 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2537 /// for tail call optimization. Targets which want to do tail call
2538 /// optimization should implement this function.
2539 bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2540  SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2541  bool isCalleeStructRet, bool isCallerStructRet,
2542  const SmallVectorImpl<ISD::OutputArg> &Outs,
2543  const SmallVectorImpl<SDValue> &OutVals,
2545  const bool isIndirect) const {
2546  MachineFunction &MF = DAG.getMachineFunction();
2547  const Function &CallerF = MF.getFunction();
2548  CallingConv::ID CallerCC = CallerF.getCallingConv();
2549 
2550  assert(Subtarget->supportsTailCall());
2551 
2552  // Indirect tail calls cannot be optimized for Thumb1 if the args
2553  // to the call take up r0-r3. The reason is that there are no legal registers
2554  // left to hold the pointer to the function to be called.
2555  if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2556  (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
2557  return false;
2558 
2559  // Look for obvious safe cases to perform tail call optimization that do not
2560  // require ABI changes. This is what gcc calls sibcall.
2561 
2562  // Exception-handling functions need a special set of instructions to indicate
2563  // a return to the hardware. Tail-calling another function would probably
2564  // break this.
2565  if (CallerF.hasFnAttribute("interrupt"))
2566  return false;
2567 
2568  // Also avoid sibcall optimization if either caller or callee uses struct
2569  // return semantics.
2570  if (isCalleeStructRet || isCallerStructRet)
2571  return false;
2572 
2573  // Externally-defined functions with weak linkage should not be
2574  // tail-called on ARM when the OS does not support dynamic
2575  // pre-emption of symbols, as the AAELF spec requires normal calls
2576  // to undefined weak functions to be replaced with a NOP or jump to the
2577  // next instruction. The behaviour of branch instructions in this
2578  // situation (as used for tail calls) is implementation-defined, so we
2579  // cannot rely on the linker replacing the tail call with a return.
2580  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2581  const GlobalValue *GV = G->getGlobal();
2583  if (GV->hasExternalWeakLinkage() &&
2584  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2585  return false;
2586  }
2587 
2588  // Check that the call results are passed in the same way.
2589  LLVMContext &C = *DAG.getContext();
2590  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2591  CCAssignFnForReturn(CalleeCC, isVarArg),
2592  CCAssignFnForReturn(CallerCC, isVarArg)))
2593  return false;
2594  // The callee has to preserve all registers the caller needs to preserve.
2595  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2596  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2597  if (CalleeCC != CallerCC) {
2598  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2599  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2600  return false;
2601  }
2602 
2603  // If Caller's vararg or byval argument has been split between registers and
2604  // stack, do not perform tail call, since part of the argument is in caller's
2605  // local frame.
2606  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2607  if (AFI_Caller->getArgRegsSaveSize())
2608  return false;
2609 
2610  // If the callee takes no arguments then go on to check the results of the
2611  // call.
2612  if (!Outs.empty()) {
2613  // Check if stack adjustment is needed. For now, do not do this if any
2614  // argument is passed on the stack.
2616  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2617  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2618  if (CCInfo.getNextStackOffset()) {
2619  // Check if the arguments are already laid out in the right way as
2620  // the caller's fixed stack objects.
2621  MachineFrameInfo &MFI = MF.getFrameInfo();
2622  const MachineRegisterInfo *MRI = &MF.getRegInfo();
2623  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2624  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2625  i != e;
2626  ++i, ++realArgIdx) {
2627  CCValAssign &VA = ArgLocs[i];
2628  EVT RegVT = VA.getLocVT();
2629  SDValue Arg = OutVals[realArgIdx];
2630  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2631  if (VA.getLocInfo() == CCValAssign::Indirect)
2632  return false;
2633  if (VA.needsCustom()) {
2634  // f64 and vector types are split into multiple registers or
2635  // register/stack-slot combinations. The types will not match
2636  // the registers; give up on memory f64 refs until we figure
2637  // out what to do about this.
2638  if (!VA.isRegLoc())
2639  return false;
2640  if (!ArgLocs[++i].isRegLoc())
2641  return false;
2642  if (RegVT == MVT::v2f64) {
2643  if (!ArgLocs[++i].isRegLoc())
2644  return false;
2645  if (!ArgLocs[++i].isRegLoc())
2646  return false;
2647  }
2648  } else if (!VA.isRegLoc()) {
2649  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2650  MFI, MRI, TII))
2651  return false;
2652  }
2653  }
2654  }
2655 
2656  const MachineRegisterInfo &MRI = MF.getRegInfo();
2657  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2658  return false;
2659  }
2660 
2661  return true;
2662 }
2663 
2664 bool
2665 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2666  MachineFunction &MF, bool isVarArg,
2667  const SmallVectorImpl<ISD::OutputArg> &Outs,
2668  LLVMContext &Context) const {
2670  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2671  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2672 }
2673 
2675  const SDLoc &DL, SelectionDAG &DAG) {
2676  const MachineFunction &MF = DAG.getMachineFunction();
2677  const Function &F = MF.getFunction();
2678 
2679  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2680 
2681  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2682  // version of the "preferred return address". These offsets affect the return
2683  // instruction if this is a return from PL1 without hypervisor extensions.
2684  // IRQ/FIQ: +4 "subs pc, lr, #4"
2685  // SWI: 0 "subs pc, lr, #0"
2686  // ABORT: +4 "subs pc, lr, #4"
2687  // UNDEF: +4/+2 "subs pc, lr, #0"
2688  // UNDEF varies depending on where the exception came from ARM or Thumb
2689  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2690 
2691  int64_t LROffset;
2692  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2693  IntKind == "ABORT")
2694  LROffset = 4;
2695  else if (IntKind == "SWI" || IntKind == "UNDEF")
2696  LROffset = 0;
2697  else
2698  report_fatal_error("Unsupported interrupt attribute. If present, value "
2699  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2700 
2701  RetOps.insert(RetOps.begin() + 1,
2702  DAG.getConstant(LROffset, DL, MVT::i32, false));
2703 
2704  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2705 }
2706 
2707 SDValue
2708 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2709  bool isVarArg,
2710  const SmallVectorImpl<ISD::OutputArg> &Outs,
2711  const SmallVectorImpl<SDValue> &OutVals,
2712  const SDLoc &dl, SelectionDAG &DAG) const {
2713  // CCValAssign - represent the assignment of the return value to a location.
2715 
2716  // CCState - Info about the registers and stack slots.
2717  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2718  *DAG.getContext());
2719 
2720  // Analyze outgoing return values.
2721  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2722 
2723  SDValue Flag;
2724  SmallVector<SDValue, 4> RetOps;
2725  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2726  bool isLittleEndian = Subtarget->isLittle();
2727 
2728  MachineFunction &MF = DAG.getMachineFunction();
2730  AFI->setReturnRegsCount(RVLocs.size());
2731 
2732  // Copy the result values into the output registers.
2733  for (unsigned i = 0, realRVLocIdx = 0;
2734  i != RVLocs.size();
2735  ++i, ++realRVLocIdx) {
2736  CCValAssign &VA = RVLocs[i];
2737  assert(VA.isRegLoc() && "Can only return in registers!");
2738 
2739  SDValue Arg = OutVals[realRVLocIdx];
2740  bool ReturnF16 = false;
2741 
2742  if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
2743  // Half-precision return values can be returned like this:
2744  //
2745  // t11 f16 = fadd ...
2746  // t12: i16 = bitcast t11
2747  // t13: i32 = zero_extend t12
2748  // t14: f32 = bitcast t13 <~~~~~~~ Arg
2749  //
2750  // to avoid code generation for bitcasts, we simply set Arg to the node
2751  // that produces the f16 value, t11 in this case.
2752  //
2753  if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
2754  SDValue ZE = Arg.getOperand(0);
2755  if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
2756  SDValue BC = ZE.getOperand(0);
2757  if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
2758  Arg = BC.getOperand(0);
2759  ReturnF16 = true;
2760  }
2761  }
2762  }
2763  }
2764 
2765  switch (VA.getLocInfo()) {
2766  default: llvm_unreachable("Unknown loc info!");
2767  case CCValAssign::Full: break;
2768  case CCValAssign::BCvt:
2769  if (!ReturnF16)
2770  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2771  break;
2772  }
2773 
2774  if (VA.needsCustom()) {
2775  if (VA.getLocVT() == MVT::v2f64) {
2776  // Extract the first half and return it in two registers.
2777  SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2778  DAG.getConstant(0, dl, MVT::i32));
2779  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2780  DAG.getVTList(MVT::i32, MVT::i32), Half);
2781 
2782  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2783  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2784  Flag);
2785  Flag = Chain.getValue(1);
2786  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2787  VA = RVLocs[++i]; // skip ahead to next loc
2788  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2789  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2790  Flag);
2791  Flag = Chain.getValue(1);
2792  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2793  VA = RVLocs[++i]; // skip ahead to next loc
2794 
2795  // Extract the 2nd half and fall through to handle it as an f64 value.
2796  Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2797  DAG.getConstant(1, dl, MVT::i32));
2798  }
2799  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2800  // available.
2801  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2802  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2803  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2804  fmrrd.getValue(isLittleEndian ? 0 : 1),
2805  Flag);
2806  Flag = Chain.getValue(1);
2807  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2808  VA = RVLocs[++i]; // skip ahead to next loc
2809  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2810  fmrrd.getValue(isLittleEndian ? 1 : 0),
2811  Flag);
2812  } else
2813  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2814 
2815  // Guarantee that all emitted copies are
2816  // stuck together, avoiding something bad.
2817  Flag = Chain.getValue(1);
2818  RetOps.push_back(DAG.getRegister(VA.getLocReg(),
2819  ReturnF16 ? MVT::f16 : VA.getLocVT()));
2820  }
2821  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2822  const MCPhysReg *I =
2824  if (I) {
2825  for (; *I; ++I) {
2826  if (ARM::GPRRegClass.contains(*I))
2827  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2828  else if (ARM::DPRRegClass.contains(*I))
2829  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2830  else
2831  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2832  }
2833  }
2834 
2835  // Update chain and glue.
2836  RetOps[0] = Chain;
2837  if (Flag.getNode())
2838  RetOps.push_back(Flag);
2839 
2840  // CPUs which aren't M-class use a special sequence to return from
2841  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2842  // though we use "subs pc, lr, #N").
2843  //
2844  // M-class CPUs actually use a normal return sequence with a special
2845  // (hardware-provided) value in LR, so the normal code path works.
2846  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
2847  !Subtarget->isMClass()) {
2848  if (Subtarget->isThumb1Only())
2849  report_fatal_error("interrupt attribute is not supported in Thumb1");
2850  return LowerInterruptReturn(RetOps, dl, DAG);
2851  }
2852 
2853  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2854 }
2855 
2856 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2857  if (N->getNumValues() != 1)
2858  return false;
2859  if (!N->hasNUsesOfValue(1, 0))
2860  return false;
2861 
2862  SDValue TCChain = Chain;
2863  SDNode *Copy = *N->use_begin();
2864  if (Copy->getOpcode() == ISD::CopyToReg) {
2865  // If the copy has a glue operand, we conservatively assume it isn't safe to
2866  // perform a tail call.
2867  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2868  return false;
2869  TCChain = Copy->getOperand(0);
2870  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2871  SDNode *VMov = Copy;
2872  // f64 returned in a pair of GPRs.
2874  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2875  UI != UE; ++UI) {
2876  if (UI->getOpcode() != ISD::CopyToReg)
2877  return false;
2878  Copies.insert(*UI);
2879  }
2880  if (Copies.size() > 2)
2881  return false;
2882 
2883  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2884  UI != UE; ++UI) {
2885  SDValue UseChain = UI->getOperand(0);
2886  if (Copies.count(UseChain.getNode()))
2887  // Second CopyToReg
2888  Copy = *UI;
2889  else {
2890  // We are at the top of this chain.
2891  // If the copy has a glue operand, we conservatively assume it
2892  // isn't safe to perform a tail call.
2893  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2894  return false;
2895  // First CopyToReg
2896  TCChain = UseChain;
2897  }
2898  }
2899  } else if (Copy->getOpcode() == ISD::BITCAST) {
2900  // f32 returned in a single GPR.
2901  if (!Copy->hasOneUse())
2902  return false;
2903  Copy = *Copy->use_begin();
2904  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2905  return false;
2906  // If the copy has a glue operand, we conservatively assume it isn't safe to
2907  // perform a tail call.
2908  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2909  return false;
2910  TCChain = Copy->getOperand(0);
2911  } else {
2912  return false;
2913  }
2914 
2915  bool HasRet = false;
2916  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2917  UI != UE; ++UI) {
2918  if (UI->getOpcode() != ARMISD::RET_FLAG &&
2919  UI->getOpcode() != ARMISD::INTRET_FLAG)
2920  return false;
2921  HasRet = true;
2922  }
2923 
2924  if (!HasRet)
2925  return false;
2926 
2927  Chain = TCChain;
2928  return true;
2929 }
2930 
2931 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2932  if (!Subtarget->supportsTailCall())
2933  return false;
2934 
2935  auto Attr =
2936  CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2937  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2938  return false;
2939 
2940  return true;
2941 }
2942 
2943 // Trying to write a 64 bit value so need to split into two 32 bit values first,
2944 // and pass the lower and high parts through.
2946  SDLoc DL(Op);
2947  SDValue WriteValue = Op->getOperand(2);
2948 
2949  // This function is only supposed to be called for i64 type argument.
2950  assert(WriteValue.getValueType() == MVT::i64
2951  && "LowerWRITE_REGISTER called for non-i64 type argument.");
2952 
2953  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2954  DAG.getConstant(0, DL, MVT::i32));
2955  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2956  DAG.getConstant(1, DL, MVT::i32));
2957  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2958  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2959 }
2960 
2961 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2962 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2963 // one of the above mentioned nodes. It has to be wrapped because otherwise
2964 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2965 // be used to form addressing mode. These wrapped nodes will be selected
2966 // into MOVi.
2967 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2968  SelectionDAG &DAG) const {
2969  EVT PtrVT = Op.getValueType();
2970  // FIXME there is no actual debug info here
2971  SDLoc dl(Op);
2972  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2973  SDValue Res;
2974 
2975  // When generating execute-only code Constant Pools must be promoted to the
2976  // global data section. It's a bit ugly that we can't share them across basic
2977  // blocks, but this way we guarantee that execute-only behaves correct with
2978  // position-independent addressing modes.
2979  if (Subtarget->genExecuteOnly()) {
2980  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2981  auto T = const_cast<Type*>(CP->getType());
2982  auto C = const_cast<Constant*>(CP->getConstVal());
2983  auto M = const_cast<Module*>(DAG.getMachineFunction().
2984  getFunction().getParent());
2985  auto GV = new GlobalVariable(
2986  *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
2987  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
2988  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
2989  Twine(AFI->createPICLabelUId())
2990  );
2991  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
2992  dl, PtrVT);
2993  return LowerGlobalAddress(GA, DAG);
2994  }
2995 
2996  if (CP->isMachineConstantPoolEntry())
2997  Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2998  CP->getAlignment());
2999  else
3000  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
3001  CP->getAlignment());
3002  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3003 }
3004 
3007 }
3008 
3009 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3010  SelectionDAG &DAG) const {
3011  MachineFunction &MF = DAG.getMachineFunction();
3013  unsigned ARMPCLabelIndex = 0;
3014  SDLoc DL(Op);
3015  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3016  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3017  SDValue CPAddr;
3018  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3019  if (!IsPositionIndependent) {
3020  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
3021  } else {
3022  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3023  ARMPCLabelIndex = AFI->createPICLabelUId();
3024  ARMConstantPoolValue *CPV =
3025  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3026  ARMCP::CPBlockAddress, PCAdj);
3027  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3028  }
3029  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3030  SDValue Result = DAG.getLoad(
3031  PtrVT, DL, DAG.getEntryNode(), CPAddr,
3033  if (!IsPositionIndependent)
3034  return Result;
3035  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3036  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3037 }
3038 
3039 /// Convert a TLS address reference into the correct sequence of loads
3040 /// and calls to compute the variable's address for Darwin, and return an
3041 /// SDValue containing the final node.
3042 
3043 /// Darwin only has one TLS scheme which must be capable of dealing with the
3044 /// fully general situation, in the worst case. This means:
3045 /// + "extern __thread" declaration.
3046 /// + Defined in a possibly unknown dynamic library.
3047 ///
3048 /// The general system is that each __thread variable has a [3 x i32] descriptor
3049 /// which contains information used by the runtime to calculate the address. The
3050 /// only part of this the compiler needs to know about is the first word, which
3051 /// contains a function pointer that must be called with the address of the
3052 /// entire descriptor in "r0".
3053 ///
3054 /// Since this descriptor may be in a different unit, in general access must
3055 /// proceed along the usual ARM rules. A common sequence to produce is:
3056 ///
3057 /// movw rT1, :lower16:_var$non_lazy_ptr
3058 /// movt rT1, :upper16:_var$non_lazy_ptr
3059 /// ldr r0, [rT1]
3060 /// ldr rT2, [r0]
3061 /// blx rT2
3062 /// [...address now in r0...]
3063 SDValue
3064 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3065  SelectionDAG &DAG) const {
3066  assert(Subtarget->isTargetDarwin() &&
3067  "This function expects a Darwin target");
3068  SDLoc DL(Op);
3069 
3070  // First step is to get the address of the actua global symbol. This is where
3071  // the TLS descriptor lives.
3072  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3073 
3074  // The first entry in the descriptor is a function pointer that we must call
3075  // to obtain the address of the variable.
3076  SDValue Chain = DAG.getEntryNode();
3077  SDValue FuncTLVGet = DAG.getLoad(
3078  MVT::i32, DL, Chain, DescAddr,
3080  /* Alignment = */ 4,
3083  Chain = FuncTLVGet.getValue(1);
3084 
3086  MachineFrameInfo &MFI = F.getFrameInfo();
3087  MFI.setAdjustsStack(true);
3088 
3089  // TLS calls preserve all registers except those that absolutely must be
3090  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3091  // silly).
3092  auto TRI =
3093  getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3094  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3095  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
3096 
3097  // Finally, we can make the call. This is just a degenerate version of a
3098  // normal AArch64 call node: r0 takes the address of the descriptor, and
3099  // returns the address of the variable in this thread.
3100  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3101  Chain =
3103  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3104  DAG.getRegisterMask(Mask), Chain.getValue(1));
3105  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3106 }
3107 
3108 SDValue
3109 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3110  SelectionDAG &DAG) const {
3111  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3112 
3113  SDValue Chain = DAG.getEntryNode();
3114  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3115  SDLoc DL(Op);
3116 
3117  // Load the current TEB (thread environment block)
3118  SDValue Ops[] = {Chain,
3119  DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3120  DAG.getConstant(15, DL, MVT::i32),
3121  DAG.getConstant(0, DL, MVT::i32),
3122  DAG.getConstant(13, DL, MVT::i32),
3123  DAG.getConstant(0, DL, MVT::i32),
3124  DAG.getConstant(2, DL, MVT::i32)};
3125  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3126  DAG.getVTList(MVT::i32, MVT::Other), Ops);
3127 
3128  SDValue TEB = CurrentTEB.getValue(0);
3129  Chain = CurrentTEB.getValue(1);
3130 
3131  // Load the ThreadLocalStoragePointer from the TEB
3132  // A pointer to the TLS array is located at offset 0x2c from the TEB.
3133  SDValue TLSArray =
3134  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3135  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3136 
3137  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3138  // offset into the TLSArray.
3139 
3140  // Load the TLS index from the C runtime
3141  SDValue TLSIndex =
3142  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3143  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3144  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3145 
3146  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3147  DAG.getConstant(2, DL, MVT::i32));
3148  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3149  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3150  MachinePointerInfo());
3151 
3152  // Get the offset of the start of the .tls section (section base)
3153  const auto *GA = cast<GlobalAddressSDNode>(Op);
3154  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3155  SDValue Offset = DAG.getLoad(
3156  PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3157  DAG.getTargetConstantPool(CPV, PtrVT, 4)),
3159 
3160  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3161 }
3162 
3163 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
3164 SDValue
3165 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3166  SelectionDAG &DAG) const {
3167  SDLoc dl(GA);
3168  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3169  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3170  MachineFunction &MF = DAG.getMachineFunction();
3172  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3173  ARMConstantPoolValue *CPV =
3174  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3175  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3176  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3177  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3178  Argument = DAG.getLoad(
3179  PtrVT, dl, DAG.getEntryNode(), Argument,
3181  SDValue Chain = Argument.getValue(1);
3182 
3183  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3184  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3185 
3186  // call __tls_get_addr.
3187  ArgListTy Args;
3189  Entry.Node = Argument;
3190  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3191  Args.push_back(Entry);
3192 
3193  // FIXME: is there useful debug info available here?
3195  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3197  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3198 
3199  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3200  return CallResult.first;
3201 }
3202 
3203 // Lower ISD::GlobalTLSAddress using the "initial exec" or
3204 // "local exec" model.
3205 SDValue
3206 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3207  SelectionDAG &DAG,
3208  TLSModel::Model model) const {
3209  const GlobalValue *GV = GA->getGlobal();
3210  SDLoc dl(GA);
3211  SDValue Offset;
3212  SDValue Chain = DAG.getEntryNode();
3213  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3214  // Get the Thread Pointer
3216 
3217  if (model == TLSModel::InitialExec) {
3218  MachineFunction &MF = DAG.getMachineFunction();
3220  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3221  // Initial exec model.
3222  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3223  ARMConstantPoolValue *CPV =
3224  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3226  true);
3227  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3228  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3229  Offset = DAG.getLoad(
3230  PtrVT, dl, Chain, Offset,
3232  Chain = Offset.getValue(1);
3233 
3234  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3235  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3236 
3237  Offset = DAG.getLoad(
3238  PtrVT, dl, Chain, Offset,
3240  } else {
3241  // local exec model
3242  assert(model == TLSModel::LocalExec);
3243  ARMConstantPoolValue *CPV =
3245  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3246  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3247  Offset = DAG.getLoad(
3248  PtrVT, dl, Chain, Offset,
3250  }
3251 
3252  // The address of the thread local variable is the add of the thread
3253  // pointer with the offset of the variable.
3254  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3255 }
3256 
3257 SDValue
3258 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3259  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3260  if (DAG.getTarget().useEmulatedTLS())
3261  return LowerToTLSEmulatedModel(GA, DAG);
3262 
3263  if (Subtarget->isTargetDarwin())
3264  return LowerGlobalTLSAddressDarwin(Op, DAG);
3265 
3266  if (Subtarget->isTargetWindows())
3267  return LowerGlobalTLSAddressWindows(Op, DAG);
3268 
3269  // TODO: implement the "local dynamic" model
3270  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3272 
3273  switch (model) {
3276  return LowerToTLSGeneralDynamicModel(GA, DAG);
3277  case TLSModel::InitialExec:
3278  case TLSModel::LocalExec:
3279  return LowerToTLSExecModels(GA, DAG, model);
3280  }
3281  llvm_unreachable("bogus TLS model");
3282 }
3283 
3284 /// Return true if all users of V are within function F, looking through
3285 /// ConstantExprs.
3286 static bool allUsersAreInFunction(const Value *V, const Function *F) {
3287  SmallVector<const User*,4> Worklist;
3288  for (auto *U : V->users())
3289  Worklist.push_back(U);
3290  while (!Worklist.empty()) {
3291  auto *U = Worklist.pop_back_val();
3292  if (isa<ConstantExpr>(U)) {
3293  for (auto *UU : U->users())
3294  Worklist.push_back(UU);
3295  continue;
3296  }
3297 
3298  auto *I = dyn_cast<Instruction>(U);
3299  if (!I || I->getParent()->getParent() != F)
3300  return false;
3301  }
3302  return true;
3303 }
3304 
3306  const GlobalValue *GV, SelectionDAG &DAG,
3307  EVT PtrVT, const SDLoc &dl) {
3308  // If we're creating a pool entry for a constant global with unnamed address,
3309  // and the global is small enough, we can emit it inline into the constant pool
3310  // to save ourselves an indirection.
3311  //
3312  // This is a win if the constant is only used in one function (so it doesn't
3313  // need to be duplicated) or duplicating the constant wouldn't increase code
3314  // size (implying the constant is no larger than 4 bytes).
3315  const Function &F = DAG.getMachineFunction().getFunction();
3316 
3317  // We rely on this decision to inline being idemopotent and unrelated to the
3318  // use-site. We know that if we inline a variable at one use site, we'll
3319  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3320  // doesn't know about this optimization, so bail out if it's enabled else
3321  // we could decide to inline here (and thus never emit the GV) but require
3322  // the GV from fast-isel generated code.
3323  if (!EnableConstpoolPromotion ||
3325  return SDValue();
3326 
3327  auto *GVar = dyn_cast<GlobalVariable>(GV);
3328  if (!GVar || !GVar->hasInitializer() ||
3329  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3330  !GVar->hasLocalLinkage())
3331  return SDValue();
3332 
3333  // If we inline a value that contains relocations, we move the relocations
3334  // from .data to .text. This is not allowed in position-independent code.
3335  auto *Init = GVar->getInitializer();
3336  if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3337  Init->needsRelocation())
3338  return SDValue();
3339 
3340  // The constant islands pass can only really deal with alignment requests
3341  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3342  // any type wanting greater alignment requirements than 4 bytes. We also
3343  // can only promote constants that are multiples of 4 bytes in size or
3344  // are paddable to a multiple of 4. Currently we only try and pad constants
3345  // that are strings for simplicity.
3346  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3347  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3348  unsigned Align = DAG.getDataLayout().getPreferredAlignment(GVar);
3349  unsigned RequiredPadding = 4 - (Size % 4);
3350  bool PaddingPossible =
3351  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3352  if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3353  Size == 0)
3354  return SDValue();
3355 
3356  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3357  MachineFunction &MF = DAG.getMachineFunction();
3359 
3360  // We can't bloat the constant pool too much, else the ConstantIslands pass
3361  // may fail to converge. If we haven't promoted this global yet (it may have
3362  // multiple uses), and promoting it would increase the constant pool size (Sz
3363  // > 4), ensure we have space to do so up to MaxTotal.
3364  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3365  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3367  return SDValue();
3368 
3369  // This is only valid if all users are in a single function; we can't clone
3370  // the constant in general. The LLVM IR unnamed_addr allows merging
3371  // constants, but not cloning them.
3372  //
3373  // We could potentially allow cloning if we could prove all uses of the
3374  // constant in the current function don't care about the address, like
3375  // printf format strings. But that isn't implemented for now.
3376  if (!allUsersAreInFunction(GVar, &F))
3377  return SDValue();
3378 
3379  // We're going to inline this global. Pad it out if needed.
3380  if (RequiredPadding != 4) {
3381  StringRef S = CDAInit->getAsString();
3382 
3384  std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3385  while (RequiredPadding--)
3386  V.push_back(0);
3387  Init = ConstantDataArray::get(*DAG.getContext(), V);
3388  }
3389 
3390  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3391  SDValue CPAddr =
3392  DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3393  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3396  PaddedSize - 4);
3397  }
3398  ++NumConstpoolPromoted;
3399  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3400 }
3401 
3403  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3404  if (!(GV = GA->getBaseObject()))
3405  return false;
3406  if (const auto *V = dyn_cast<GlobalVariable>(GV))
3407  return V->isConstant();
3408  return isa<Function>(GV);
3409 }
3410 
3411 SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3412  SelectionDAG &DAG) const {
3413  switch (Subtarget->getTargetTriple().getObjectFormat()) {
3414  default: llvm_unreachable("unknown object format");
3415  case Triple::COFF:
3416  return LowerGlobalAddressWindows(Op, DAG);
3417  case Triple::ELF:
3418  return LowerGlobalAddressELF(Op, DAG);
3419  case Triple::MachO:
3420  return LowerGlobalAddressDarwin(Op, DAG);
3421  }
3422 }
3423 
3424 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3425  SelectionDAG &DAG) const {
3426  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3427  SDLoc dl(Op);
3428  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3429  const TargetMachine &TM = getTargetMachine();
3430  bool IsRO = isReadOnly(GV);
3431 
3432  // promoteToConstantPool only if not generating XO text section
3433  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3434  if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3435  return V;
3436 
3437  if (isPositionIndependent()) {
3438  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3439  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3440  UseGOT_PREL ? ARMII::MO_GOT : 0);
3441  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3442  if (UseGOT_PREL)
3443  Result =
3444  DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3446  return Result;
3447  } else if (Subtarget->isROPI() && IsRO) {
3448  // PC-relative.
3449  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3450  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3451  return Result;
3452  } else if (Subtarget->isRWPI() && !IsRO) {
3453  // SB-relative.
3454  SDValue RelAddr;
3455  if (Subtarget->useMovt()) {
3456  ++NumMovwMovt;
3457  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3458  RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3459  } else { // use literal pool for address constant
3460  ARMConstantPoolValue *CPV =
3462  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3463  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3464  RelAddr = DAG.getLoad(
3465  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3467  }
3468  SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3469  SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3470  return Result;
3471  }
3472 
3473  // If we have T2 ops, we can materialize the address directly via movt/movw
3474  // pair. This is always cheaper.
3475  if (Subtarget->useMovt()) {
3476  ++NumMovwMovt;
3477  // FIXME: Once remat is capable of dealing with instructions with register
3478  // operands, expand this into two nodes.
3479  return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3480  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3481  } else {
3482  SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3483  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3484  return DAG.getLoad(
3485  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3487  }
3488 }
3489 
3490 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3491  SelectionDAG &DAG) const {
3492  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3493  "ROPI/RWPI not currently supported for Darwin");
3494  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3495  SDLoc dl(Op);
3496  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3497 
3498  if (Subtarget->useMovt())
3499  ++NumMovwMovt;
3500 
3501  // FIXME: Once remat is capable of dealing with instructions with register
3502  // operands, expand this into multiple nodes
3503  unsigned Wrapper =
3505 
3506  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3507  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3508 
3509  if (Subtarget->isGVIndirectSymbol(GV))
3510  Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3512  return Result;
3513 }
3514 
3515 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3516  SelectionDAG &DAG) const {
3517  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3518  assert(Subtarget->useMovt() &&
3519  "Windows on ARM expects to use movw/movt");
3520  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3521  "ROPI/RWPI not currently supported for Windows");
3522 
3523  const TargetMachine &TM = getTargetMachine();
3524  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3525  ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
3526  if (GV->hasDLLImportStorageClass())
3527  TargetFlags = ARMII::MO_DLLIMPORT;
3528  else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
3529  TargetFlags = ARMII::MO_COFFSTUB;
3530  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3531  SDValue Result;
3532  SDLoc DL(Op);
3533 
3534  ++NumMovwMovt;
3535 
3536  // FIXME: Once remat is capable of dealing with instructions with register
3537  // operands, expand this into two nodes.
3538  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3539  DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
3540  TargetFlags));
3541  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
3542  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3544  return Result;
3545 }
3546 
3547 SDValue
3548 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3549  SDLoc dl(Op);
3550  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3551  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3552  DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3553  Op.getOperand(1), Val);
3554 }
3555 
3556 SDValue
3557 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3558  SDLoc dl(Op);
3559  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3560  Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3561 }
3562 
3563 SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3564  SelectionDAG &DAG) const {
3565  SDLoc dl(Op);
3567  Op.getOperand(0));
3568 }
3569 
3570 SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
3571  SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
3572  unsigned IntNo =
3573  cast<ConstantSDNode>(
3575  ->getZExtValue();
3576  switch (IntNo) {
3577  default:
3578  return SDValue(); // Don't custom lower most intrinsics.
3579  case Intrinsic::arm_gnu_eabi_mcount: {
3580  MachineFunction &MF = DAG.getMachineFunction();
3581  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3582  SDLoc dl(Op);
3583  SDValue Chain = Op.getOperand(0);
3584  // call "\01__gnu_mcount_nc"
3585  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
3586  const uint32_t *Mask =
3588  assert(Mask && "Missing call preserved mask for calling convention");
3589  // Mark LR an implicit live-in.
3590  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
3591  SDValue ReturnAddress =
3592  DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
3593  std::vector<EVT> ResultTys = {MVT::Other, MVT::Glue};
3594  SDValue Callee =
3595  DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
3596  SDValue RegisterMask = DAG.getRegisterMask(Mask);
3597  if (Subtarget->isThumb())
3598  return SDValue(
3599  DAG.getMachineNode(
3600  ARM::tBL_PUSHLR, dl, ResultTys,
3601  {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
3602  DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
3603  0);
3604  return SDValue(
3605  DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
3606  {ReturnAddress, Callee, RegisterMask, Chain}),
3607  0);
3608  }
3609  }
3610 }
3611 
3612 SDValue
3613 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3614  const ARMSubtarget *Subtarget) const {
3615  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3616  SDLoc dl(Op);
3617  switch (IntNo) {
3618  default: return SDValue(); // Don't custom lower most intrinsics.
3619  case Intrinsic::thread_pointer: {
3620  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3621  return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3622  }
3623  case Intrinsic::eh_sjlj_lsda: {
3624  MachineFunction &MF = DAG.getMachineFunction();
3626  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3627  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3628  SDValue CPAddr;
3629  bool IsPositionIndependent = isPositionIndependent();
3630  unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3631  ARMConstantPoolValue *CPV =
3632  ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
3633  ARMCP::CPLSDA, PCAdj);
3634  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3635  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3636  SDValue Result = DAG.getLoad(
3637  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3639 
3640  if (IsPositionIndependent) {
3641  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3642  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3643  }
3644  return Result;
3645  }
3646  case Intrinsic::arm_neon_vabs:
3647  return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3648  Op.getOperand(1));
3649  case Intrinsic::arm_neon_vmulls:
3650  case Intrinsic::arm_neon_vmullu: {
3651  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3653  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3654  Op.getOperand(1), Op.getOperand(2));
3655  }
3656  case Intrinsic::arm_neon_vminnm:
3657  case Intrinsic::arm_neon_vmaxnm: {
3658  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3660  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3661  Op.getOperand(1), Op.getOperand(2));
3662  }
3663  case Intrinsic::arm_neon_vminu:
3664  case Intrinsic::arm_neon_vmaxu: {
3665  if (Op.getValueType().isFloatingPoint())
3666  return SDValue();
3667  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3668  ? ISD::UMIN : ISD::UMAX;
3669  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3670  Op.getOperand(1), Op.getOperand(2));
3671  }
3672  case Intrinsic::arm_neon_vmins:
3673  case Intrinsic::arm_neon_vmaxs: {
3674  // v{min,max}s is overloaded between signed integers and floats.
3675  if (!Op.getValueType().isFloatingPoint()) {
3676  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3677  ? ISD::SMIN : ISD::SMAX;
3678  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3679  Op.getOperand(1), Op.getOperand(2));
3680  }
3681  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3683  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3684  Op.getOperand(1), Op.getOperand(2));
3685  }
3686  case Intrinsic::arm_neon_vtbl1:
3687  return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3688  Op.getOperand(1), Op.getOperand(2));
3689  case Intrinsic::arm_neon_vtbl2:
3690  return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3691  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3692  }
3693 }
3694 
3696  const ARMSubtarget *Subtarget) {
3697  SDLoc dl(Op);
3698  ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3699  auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3700  if (SSID == SyncScope::SingleThread)
3701  return Op;
3702 
3703  if (!Subtarget->hasDataBarrier()) {
3704  // Some ARMv6 cpus can support data barriers with an mcr instruction.
3705  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3706  // here.
3707  assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3708  "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3709  return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3710  DAG.getConstant(0, dl, MVT::i32));
3711  }
3712 
3713  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3714  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3715  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3716  if (Subtarget->isMClass()) {
3717  // Only a full system barrier exists in the M-class architectures.
3718  Domain = ARM_MB::SY;
3719  } else if (Subtarget->preferISHSTBarriers() &&
3720  Ord == AtomicOrdering::Release) {
3721  // Swift happens to implement ISHST barriers in a way that's compatible with
3722  // Release semantics but weaker than ISH so we'd be fools not to use
3723  // it. Beware: other processors probably don't!
3724  Domain = ARM_MB::ISHST;
3725  }
3726 
3727  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3728  DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3729  DAG.getConstant(Domain, dl, MVT::i32));
3730 }
3731 
3733  const ARMSubtarget *Subtarget) {
3734  // ARM pre v5TE and Thumb1 does not have preload instructions.
3735  if (!(Subtarget->isThumb2() ||
3736  (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3737  // Just preserve the chain.
3738  return Op.getOperand(0);
3739 
3740  SDLoc dl(Op);
3741  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3742  if (!isRead &&
3743  (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3744  // ARMv7 with MP extension has PLDW.
3745  return Op.getOperand(0);
3746 
3747  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3748  if (Subtarget->isThumb()) {
3749  // Invert the bits.
3750  isRead = ~isRead & 1;
3751  isData = ~isData & 1;
3752  }
3753 
3754  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3755  Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3756  DAG.getConstant(isData, dl, MVT::i32));
3757 }
3758 
3760  MachineFunction &MF = DAG.getMachineFunction();
3761  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3762 
3763  // vastart just stores the address of the VarArgsFrameIndex slot into the
3764  // memory location argument.
3765  SDLoc dl(Op);
3766  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3767  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3768  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3769  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3770  MachinePointerInfo(SV));
3771 }
3772 
3773 SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3774  CCValAssign &NextVA,
3775  SDValue &Root,
3776  SelectionDAG &DAG,
3777  const SDLoc &dl) const {
3778  MachineFunction &MF = DAG.getMachineFunction();
3780 
3781  const TargetRegisterClass *RC;
3782  if (AFI->isThumb1OnlyFunction())
3783  RC = &ARM::tGPRRegClass;
3784  else
3785  RC = &ARM::GPRRegClass;
3786 
3787  // Transform the arguments stored in physical registers into virtual ones.
3788  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3789  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3790 
3791  SDValue ArgValue2;
3792  if (NextVA.isMemLoc()) {
3793  MachineFrameInfo &MFI = MF.getFrameInfo();
3794  int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3795 
3796  // Create load node to retrieve arguments from the stack.
3797  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3798  ArgValue2 = DAG.getLoad(
3799  MVT::i32, dl, Root, FIN,
3801  } else {
3802  Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3803  ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3804  }
3805  if (!Subtarget->isLittle())
3806  std::swap (ArgValue, ArgValue2);
3807  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3808 }
3809 
3810 // The remaining GPRs hold either the beginning of variable-argument
3811 // data, or the beginning of an aggregate passed by value (usually
3812 // byval). Either way, we allocate stack slots adjacent to the data
3813 // provided by our caller, and store the unallocated registers there.
3814 // If this is a variadic function, the va_list pointer will begin with
3815 // these values; otherwise, this reassembles a (byval) structure that
3816 // was split between registers and memory.
3817 // Return: The frame index registers were stored into.
3818 int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3819  const SDLoc &dl, SDValue &Chain,
3820  const Value *OrigArg,
3821  unsigned InRegsParamRecordIdx,
3822  int ArgOffset, unsigned ArgSize) const {
3823  // Currently, two use-cases possible:
3824  // Case #1. Non-var-args function, and we meet first byval parameter.
3825  // Setup first unallocated register as first byval register;
3826  // eat all remained registers
3827  // (these two actions are performed by HandleByVal method).
3828  // Then, here, we initialize stack frame with
3829  // "store-reg" instructions.
3830  // Case #2. Var-args function, that doesn't contain byval parameters.
3831  // The same: eat all remained unallocated registers,
3832  // initialize stack frame.
3833 
3834  MachineFunction &MF = DAG.getMachineFunction();
3835  MachineFrameInfo &MFI = MF.getFrameInfo();
3837  unsigned RBegin, REnd;
3838  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3839  CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3840  } else {
3841  unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3842  RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3843  REnd = ARM::R4;
3844  }
3845 
3846  if (REnd != RBegin)
3847  ArgOffset = -4 * (ARM::R4 - RBegin);
3848 
3849  auto PtrVT = getPointerTy(DAG.getDataLayout());
3850  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3851  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3852 
3853  SmallVector<SDValue, 4> MemOps;
3854  const TargetRegisterClass *RC =
3855  AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3856 
3857  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3858  unsigned VReg = MF.addLiveIn(Reg, RC);
3859  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3860  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3861  MachinePointerInfo(OrigArg, 4 * i));
3862  MemOps.push_back(Store);
3863  FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3864  }
3865 
3866  if (!MemOps.empty())
3867  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3868  return FrameIndex;
3869 }
3870 
3871 // Setup stack frame, the va_list pointer will start from.
3872 void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3873  const SDLoc &dl, SDValue &Chain,
3874  unsigned ArgOffset,
3875  unsigned TotalArgRegsSaveSize,
3876  bool ForceMutable) const {
3877  MachineFunction &MF = DAG.getMachineFunction();
3879 
3880  // Try to store any remaining integer argument regs
3881  // to their spots on the stack so that they may be loaded by dereferencing
3882  // the result of va_next.
3883  // If there is no regs to be stored, just point address after last
3884  // argument passed via stack.
3885  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3886  CCInfo.getInRegsParamsCount(),
3887  CCInfo.getNextStackOffset(),
3888  std::max(4U, TotalArgRegsSaveSize));
3889  AFI->setVarArgsFrameIndex(FrameIndex);
3890 }
3891 
3892 SDValue ARMTargetLowering::LowerFormalArguments(
3893  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3894  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3895  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3896  MachineFunction &MF = DAG.getMachineFunction();
3897  MachineFrameInfo &MFI = MF.getFrameInfo();
3898 
3900 
3901  // Assign locations to all of the incoming arguments.
3903  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3904  *DAG.getContext());
3905  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3906 
3907  SmallVector<SDValue, 16> ArgValues;
3908  SDValue ArgValue;
3910  unsigned CurArgIdx = 0;
3911 
3912  // Initially ArgRegsSaveSize is zero.
3913  // Then we increase this value each time we meet byval parameter.
3914  // We also increase this value in case of varargs function.
3915  AFI->setArgRegsSaveSize(0);
3916 
3917  // Calculate the amount of stack space that we need to allocate to store
3918  // byval and variadic arguments that are passed in registers.
3919  // We need to know this before we allocate the first byval or variadic
3920  // argument, as they will be allocated a stack slot below the CFA (Canonical
3921  // Frame Address, the stack pointer at entry to the function).
3922  unsigned ArgRegBegin = ARM::R4;
3923  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3924  if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3925  break;
3926 
3927  CCValAssign &VA = ArgLocs[i];
3928  unsigned Index = VA.getValNo();
3929  ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3930  if (!Flags.isByVal())
3931  continue;
3932 
3933  assert(VA.isMemLoc() && "unexpected byval pointer in reg");
3934  unsigned RBegin, REnd;
3935  CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3936  ArgRegBegin = std::min(ArgRegBegin, RBegin);
3937 
3938  CCInfo.nextInRegsParam();
3939  }
3940  CCInfo.rewindByValRegsInfo();
3941 
3942  int lastInsIndex = -1;
3943  if (isVarArg && MFI.hasVAStart()) {
3944  unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3945  if (RegIdx != array_lengthof(GPRArgRegs))
3946  ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3947  }
3948 
3949  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3950  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3951  auto PtrVT = getPointerTy(DAG.getDataLayout());
3952 
3953  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3954  CCValAssign &VA = ArgLocs[i];
3955  if (Ins[VA.getValNo()].isOrigArg()) {
3956  std::advance(CurOrigArg,
3957  Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3958  CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3959  }
3960  // Arguments stored in registers.
3961  if (VA.isRegLoc()) {
3962  EVT RegVT = VA.getLocVT();
3963 
3964  if (VA.needsCustom()) {
3965  // f64 and vector types are split up into multiple registers or
3966  // combinations of registers and stack slots.
3967  if (VA.getLocVT() == MVT::v2f64) {
3968  SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3969  Chain, DAG, dl);
3970  VA = ArgLocs[++i]; // skip ahead to next loc
3971  SDValue ArgValue2;
3972  if (VA.isMemLoc()) {
3973  int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3974  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3975  ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3977  DAG.getMachineFunction(), FI));
3978  } else {
3979  ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3980  Chain, DAG, dl);
3981  }
3982  ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3983  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3984  ArgValue, ArgValue1,
3985  DAG.getIntPtrConstant(0, dl));
3986  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3987  ArgValue, ArgValue2,
3988  DAG.getIntPtrConstant(1, dl));
3989  } else
3990  ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3991  } else {
3992  const TargetRegisterClass *RC;
3993 
3994 
3995  if (RegVT == MVT::f16)
3996  RC = &ARM::HPRRegClass;
3997  else if (RegVT == MVT::f32)
3998  RC = &ARM::SPRRegClass;
3999  else if (RegVT == MVT::f64 || RegVT == MVT::v4f16)
4000  RC = &ARM::DPRRegClass;
4001  else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16)
4002  RC = &ARM::QPRRegClass;
4003  else if (RegVT == MVT::i32)
4004  RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4005  : &ARM::GPRRegClass;
4006  else
4007  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
4008 
4009  // Transform the arguments in physical registers into virtual ones.
4010  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
4011  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4012 
4013  // If this value is passed in r0 and has the returned attribute (e.g.
4014  // C++ 'structors), record this fact for later use.
4015  if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4016  AFI->setPreservesR0();
4017  }
4018  }
4019 
4020  // If this is an 8 or 16-bit value, it is really passed promoted
4021  // to 32 bits. Insert an assert[sz]ext to capture this, then
4022  // truncate to the right size.
4023  switch (VA.getLocInfo()) {
4024  default: llvm_unreachable("Unknown loc info!");
4025  case CCValAssign::Full: break;
4026  case CCValAssign::BCvt:
4027  ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4028  break;
4029  case CCValAssign::SExt:
4030  ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
4031  DAG.getValueType(VA.getValVT()));
4032  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4033  break;
4034  case CCValAssign::ZExt:
4035  ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
4036  DAG.getValueType(VA.getValVT()));
4037  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);