LLVM  10.0.0svn
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that ARM uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMISelLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMCallingConv.h"
18 #include "ARMConstantPoolValue.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMPerfectShuffle.h"
21 #include "ARMRegisterInfo.h"
22 #include "ARMSelectionDAGInfo.h"
23 #include "ARMSubtarget.h"
26 #include "Utils/ARMBaseInfo.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/BitVector.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/ADT/SmallPtrSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
36 #include "llvm/ADT/StringExtras.h"
37 #include "llvm/ADT/StringRef.h"
38 #include "llvm/ADT/StringSwitch.h"
39 #include "llvm/ADT/Triple.h"
40 #include "llvm/ADT/Twine.h"
64 #include "llvm/IR/Attributes.h"
65 #include "llvm/IR/CallingConv.h"
66 #include "llvm/IR/Constant.h"
67 #include "llvm/IR/Constants.h"
68 #include "llvm/IR/DataLayout.h"
69 #include "llvm/IR/DebugLoc.h"
70 #include "llvm/IR/DerivedTypes.h"
71 #include "llvm/IR/Function.h"
72 #include "llvm/IR/GlobalAlias.h"
73 #include "llvm/IR/GlobalValue.h"
74 #include "llvm/IR/GlobalVariable.h"
75 #include "llvm/IR/IRBuilder.h"
76 #include "llvm/IR/InlineAsm.h"
77 #include "llvm/IR/Instruction.h"
78 #include "llvm/IR/Instructions.h"
79 #include "llvm/IR/IntrinsicInst.h"
80 #include "llvm/IR/Intrinsics.h"
81 #include "llvm/IR/Module.h"
82 #include "llvm/IR/PatternMatch.h"
83 #include "llvm/IR/Type.h"
84 #include "llvm/IR/User.h"
85 #include "llvm/IR/Value.h"
86 #include "llvm/MC/MCInstrDesc.h"
88 #include "llvm/MC/MCRegisterInfo.h"
89 #include "llvm/MC/MCSchedule.h"
92 #include "llvm/Support/Casting.h"
93 #include "llvm/Support/CodeGen.h"
95 #include "llvm/Support/Compiler.h"
96 #include "llvm/Support/Debug.h"
98 #include "llvm/Support/KnownBits.h"
100 #include "llvm/Support/MathExtras.h"
104 #include <algorithm>
105 #include <cassert>
106 #include <cstdint>
107 #include <cstdlib>
108 #include <iterator>
109 #include <limits>
110 #include <string>
111 #include <tuple>
112 #include <utility>
113 #include <vector>
114 
115 using namespace llvm;
116 using namespace llvm::PatternMatch;
117 
118 #define DEBUG_TYPE "arm-isel"
119 
120 STATISTIC(NumTailCalls, "Number of tail calls");
121 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
122 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
123 STATISTIC(NumConstpoolPromoted,
124  "Number of constants with their storage promoted into constant pools");
125 
126 static cl::opt<bool>
127 ARMInterworking("arm-interworking", cl::Hidden,
128  cl::desc("Enable / disable ARM interworking (for debugging only)"),
129  cl::init(true));
130 
132  "arm-promote-constant", cl::Hidden,
133  cl::desc("Enable / disable promotion of unnamed_addr constants into "
134  "constant pools"),
135  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
137  "arm-promote-constant-max-size", cl::Hidden,
138  cl::desc("Maximum size of constant to promote into a constant pool"),
139  cl::init(64));
141  "arm-promote-constant-max-total", cl::Hidden,
142  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
143  cl::init(128));
144 
145 // The APCS parameter registers.
146 static const MCPhysReg GPRArgRegs[] = {
147  ARM::R0, ARM::R1, ARM::R2, ARM::R3
148 };
149 
150 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
151  MVT PromotedBitwiseVT) {
152  if (VT != PromotedLdStVT) {
153  setOperationAction(ISD::LOAD, VT, Promote);
154  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
155 
156  setOperationAction(ISD::STORE, VT, Promote);
157  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
158  }
159 
160  MVT ElemTy = VT.getVectorElementType();
161  if (ElemTy != MVT::f64)
162  setOperationAction(ISD::SETCC, VT, Custom);
163  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
164  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
165  if (ElemTy == MVT::i32) {
166  setOperationAction(ISD::SINT_TO_FP, VT, Custom);
167  setOperationAction(ISD::UINT_TO_FP, VT, Custom);
168  setOperationAction(ISD::FP_TO_SINT, VT, Custom);
169  setOperationAction(ISD::FP_TO_UINT, VT, Custom);
170  } else {
171  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
172  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
173  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
174  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
175  }
176  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
177  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
178  setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
179  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
180  setOperationAction(ISD::SELECT, VT, Expand);
181  setOperationAction(ISD::SELECT_CC, VT, Expand);
182  setOperationAction(ISD::VSELECT, VT, Expand);
183  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
184  if (VT.isInteger()) {
185  setOperationAction(ISD::SHL, VT, Custom);
186  setOperationAction(ISD::SRA, VT, Custom);
187  setOperationAction(ISD::SRL, VT, Custom);
188  }
189 
190  // Promote all bit-wise operations.
191  if (VT.isInteger() && VT != PromotedBitwiseVT) {
192  setOperationAction(ISD::AND, VT, Promote);
193  AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
194  setOperationAction(ISD::OR, VT, Promote);
195  AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
196  setOperationAction(ISD::XOR, VT, Promote);
197  AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
198  }
199 
200  // Neon does not support vector divide/remainder operations.
201  setOperationAction(ISD::SDIV, VT, Expand);
202  setOperationAction(ISD::UDIV, VT, Expand);
203  setOperationAction(ISD::FDIV, VT, Expand);
204  setOperationAction(ISD::SREM, VT, Expand);
205  setOperationAction(ISD::UREM, VT, Expand);
206  setOperationAction(ISD::FREM, VT, Expand);
207 
208  if (!VT.isFloatingPoint() &&
209  VT != MVT::v2i64 && VT != MVT::v1i64)
210  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
211  setOperationAction(Opcode, VT, Legal);
212 }
213 
214 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
215  addRegisterClass(VT, &ARM::DPRRegClass);
216  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
217 }
218 
219 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
220  addRegisterClass(VT, &ARM::DPairRegClass);
221  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
222 }
223 
224 void ARMTargetLowering::setAllExpand(MVT VT) {
225  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
226  setOperationAction(Opc, VT, Expand);
227 
228  // We support these really simple operations even on types where all
229  // the actual arithmetic has to be broken down into simpler
230  // operations or turned into library calls.
231  setOperationAction(ISD::BITCAST, VT, Legal);
232  setOperationAction(ISD::LOAD, VT, Legal);
233  setOperationAction(ISD::STORE, VT, Legal);
234  setOperationAction(ISD::UNDEF, VT, Legal);
235 }
236 
237 void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
238  LegalizeAction Action) {
239  setLoadExtAction(ISD::EXTLOAD, From, To, Action);
240  setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
241  setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
242 }
243 
244 void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
245  const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
246 
247  for (auto VT : IntTypes) {
248  addRegisterClass(VT, &ARM::MQPRRegClass);
249  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
250  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
251  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
252  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
253  setOperationAction(ISD::SHL, VT, Custom);
254  setOperationAction(ISD::SRA, VT, Custom);
255  setOperationAction(ISD::SRL, VT, Custom);
256  setOperationAction(ISD::SMIN, VT, Legal);
257  setOperationAction(ISD::SMAX, VT, Legal);
258  setOperationAction(ISD::UMIN, VT, Legal);
259  setOperationAction(ISD::UMAX, VT, Legal);
260  setOperationAction(ISD::ABS, VT, Legal);
261  setOperationAction(ISD::SETCC, VT, Custom);
262  setOperationAction(ISD::MLOAD, VT, Custom);
263  setOperationAction(ISD::MSTORE, VT, Legal);
264  setOperationAction(ISD::CTLZ, VT, Legal);
265  setOperationAction(ISD::CTTZ, VT, Custom);
266  setOperationAction(ISD::BITREVERSE, VT, Legal);
267  setOperationAction(ISD::BSWAP, VT, Legal);
268  setOperationAction(ISD::SADDSAT, VT, Legal);
269  setOperationAction(ISD::UADDSAT, VT, Legal);
270  setOperationAction(ISD::SSUBSAT, VT, Legal);
271  setOperationAction(ISD::USUBSAT, VT, Legal);
272 
273  // No native support for these.
274  setOperationAction(ISD::UDIV, VT, Expand);
275  setOperationAction(ISD::SDIV, VT, Expand);
276  setOperationAction(ISD::UREM, VT, Expand);
277  setOperationAction(ISD::SREM, VT, Expand);
278  setOperationAction(ISD::CTPOP, VT, Expand);
279 
280  // Vector reductions
281  setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
282  setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
283  setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
284  setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
285  setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
286 
287  if (!HasMVEFP) {
288  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
289  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
290  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
291  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
292  }
293 
294  // Pre and Post inc are supported on loads and stores
295  for (unsigned im = (unsigned)ISD::PRE_INC;
297  setIndexedLoadAction(im, VT, Legal);
298  setIndexedStoreAction(im, VT, Legal);
299  }
300  }
301 
302  const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
303  for (auto VT : FloatTypes) {
304  addRegisterClass(VT, &ARM::MQPRRegClass);
305  if (!HasMVEFP)
306  setAllExpand(VT);
307 
308  // These are legal or custom whether we have MVE.fp or not
309  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
310  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
311  setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
312  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
313  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
314  setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
315  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
316  setOperationAction(ISD::SETCC, VT, Custom);
317  setOperationAction(ISD::MLOAD, VT, Custom);
318  setOperationAction(ISD::MSTORE, VT, Legal);
319 
320  // Pre and Post inc are supported on loads and stores
321  for (unsigned im = (unsigned)ISD::PRE_INC;
323  setIndexedLoadAction(im, VT, Legal);
324  setIndexedStoreAction(im, VT, Legal);
325  }
326 
327  if (HasMVEFP) {
328  setOperationAction(ISD::FMINNUM, VT, Legal);
329  setOperationAction(ISD::FMAXNUM, VT, Legal);
330  setOperationAction(ISD::FROUND, VT, Legal);
331 
332  // No native support for these.
333  setOperationAction(ISD::FDIV, VT, Expand);
334  setOperationAction(ISD::FREM, VT, Expand);
335  setOperationAction(ISD::FSQRT, VT, Expand);
336  setOperationAction(ISD::FSIN, VT, Expand);
337  setOperationAction(ISD::FCOS, VT, Expand);
338  setOperationAction(ISD::FPOW, VT, Expand);
339  setOperationAction(ISD::FLOG, VT, Expand);
340  setOperationAction(ISD::FLOG2, VT, Expand);
341  setOperationAction(ISD::FLOG10, VT, Expand);
342  setOperationAction(ISD::FEXP, VT, Expand);
343  setOperationAction(ISD::FEXP2, VT, Expand);
344  setOperationAction(ISD::FNEARBYINT, VT, Expand);
345  }
346  }
347 
348  // We 'support' these types up to bitcast/load/store level, regardless of
349  // MVE integer-only / float support. Only doing FP data processing on the FP
350  // vector types is inhibited at integer-only level.
351  const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
352  for (auto VT : LongTypes) {
353  addRegisterClass(VT, &ARM::MQPRRegClass);
354  setAllExpand(VT);
355  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
356  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
357  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
358  }
359  // We can do bitwise operations on v2i64 vectors
360  setOperationAction(ISD::AND, MVT::v2i64, Legal);
361  setOperationAction(ISD::OR, MVT::v2i64, Legal);
362  setOperationAction(ISD::XOR, MVT::v2i64, Legal);
363 
364  // It is legal to extload from v4i8 to v4i16 or v4i32.
365  addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
366  addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
367  addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
368 
369  // Some truncating stores are legal too.
370  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
371  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
372  setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
373 
374  // Pre and Post inc on these are legal, given the correct extends
375  for (unsigned im = (unsigned)ISD::PRE_INC;
377  setIndexedLoadAction(im, MVT::v8i8, Legal);
378  setIndexedStoreAction(im, MVT::v8i8, Legal);
379  setIndexedLoadAction(im, MVT::v4i8, Legal);
380  setIndexedStoreAction(im, MVT::v4i8, Legal);
381  setIndexedLoadAction(im, MVT::v4i16, Legal);
382  setIndexedStoreAction(im, MVT::v4i16, Legal);
383  }
384 
385  // Predicate types
386  const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1};
387  for (auto VT : pTypes) {
388  addRegisterClass(VT, &ARM::VCCRRegClass);
389  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
390  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
391  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
392  setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
393  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
394  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
395  setOperationAction(ISD::SETCC, VT, Custom);
396  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
397  setOperationAction(ISD::LOAD, VT, Custom);
398  setOperationAction(ISD::STORE, VT, Custom);
399  }
400 }
401 
403  const ARMSubtarget &STI)
404  : TargetLowering(TM), Subtarget(&STI) {
405  RegInfo = Subtarget->getRegisterInfo();
406  Itins = Subtarget->getInstrItineraryData();
407 
410 
411  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
412  !Subtarget->isTargetWatchOS()) {
413  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
414  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
415  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
416  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
418  }
419 
420  if (Subtarget->isTargetMachO()) {
421  // Uses VFP for Thumb libfuncs if available.
422  if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
423  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
424  static const struct {
425  const RTLIB::Libcall Op;
426  const char * const Name;
427  const ISD::CondCode Cond;
428  } LibraryCalls[] = {
429  // Single-precision floating-point arithmetic.
430  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
431  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
432  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
433  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
434 
435  // Double-precision floating-point arithmetic.
436  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
437  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
438  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
439  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
440 
441  // Single-precision comparisons.
442  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
443  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
444  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
445  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
446  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
447  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
448  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
449  { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
450 
451  // Double-precision comparisons.
452  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
453  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
454  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
455  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
456  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
457  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
458  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
459  { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
460 
461  // Floating-point to integer conversions.
462  // i64 conversions are done via library routines even when generating VFP
463  // instructions, so use the same ones.
464  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
465  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
466  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
467  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
468 
469  // Conversions between floating types.
470  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
471  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
472 
473  // Integer to floating-point conversions.
474  // i64 conversions are done via library routines even when generating VFP
475  // instructions, so use the same ones.
476  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
477  // e.g., __floatunsidf vs. __floatunssidfvfp.
478  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
479  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
480  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
481  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
482  };
483 
484  for (const auto &LC : LibraryCalls) {
485  setLibcallName(LC.Op, LC.Name);
486  if (LC.Cond != ISD::SETCC_INVALID)
487  setCmpLibcallCC(LC.Op, LC.Cond);
488  }
489  }
490  }
491 
492  // These libcalls are not available in 32-bit.
493  setLibcallName(RTLIB::SHL_I128, nullptr);
494  setLibcallName(RTLIB::SRL_I128, nullptr);
495  setLibcallName(RTLIB::SRA_I128, nullptr);
496 
497  // RTLIB
498  if (Subtarget->isAAPCS_ABI() &&
499  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
500  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
501  static const struct {
502  const RTLIB::Libcall Op;
503  const char * const Name;
504  const CallingConv::ID CC;
505  const ISD::CondCode Cond;
506  } LibraryCalls[] = {
507  // Double-precision floating-point arithmetic helper functions
508  // RTABI chapter 4.1.2, Table 2
509  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
510  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
511  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
512  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
513 
514  // Double-precision floating-point comparison helper functions
515  // RTABI chapter 4.1.2, Table 3
516  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
517  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
518  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
519  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
520  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
521  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
522  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
523  { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
524 
525  // Single-precision floating-point arithmetic helper functions
526  // RTABI chapter 4.1.2, Table 4
527  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
528  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
529  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
530  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
531 
532  // Single-precision floating-point comparison helper functions
533  // RTABI chapter 4.1.2, Table 5
534  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
535  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
536  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
537  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
538  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
539  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
540  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
541  { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
542 
543  // Floating-point to integer conversions.
544  // RTABI chapter 4.1.2, Table 6
545  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
546  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
547  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
548  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
549  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
550  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
551  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
552  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
553 
554  // Conversions between floating types.
555  // RTABI chapter 4.1.2, Table 7
556  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
557  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
558  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
559 
560  // Integer to floating-point conversions.
561  // RTABI chapter 4.1.2, Table 8
562  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
563  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
564  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
565  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
566  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
567  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
568  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
569  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
570 
571  // Long long helper functions
572  // RTABI chapter 4.2, Table 9
573  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
574  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
575  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
576  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
577 
578  // Integer division functions
579  // RTABI chapter 4.3.1
580  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
581  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
582  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
583  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
584  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
585  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
586  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
587  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
588  };
589 
590  for (const auto &LC : LibraryCalls) {
591  setLibcallName(LC.Op, LC.Name);
592  setLibcallCallingConv(LC.Op, LC.CC);
593  if (LC.Cond != ISD::SETCC_INVALID)
594  setCmpLibcallCC(LC.Op, LC.Cond);
595  }
596 
597  // EABI dependent RTLIB
598  if (TM.Options.EABIVersion == EABI::EABI4 ||
600  static const struct {
601  const RTLIB::Libcall Op;
602  const char *const Name;
603  const CallingConv::ID CC;
604  const ISD::CondCode Cond;
605  } MemOpsLibraryCalls[] = {
606  // Memory operations
607  // RTABI chapter 4.3.4
609  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
610  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
611  };
612 
613  for (const auto &LC : MemOpsLibraryCalls) {
614  setLibcallName(LC.Op, LC.Name);
615  setLibcallCallingConv(LC.Op, LC.CC);
616  if (LC.Cond != ISD::SETCC_INVALID)
617  setCmpLibcallCC(LC.Op, LC.Cond);
618  }
619  }
620  }
621 
622  if (Subtarget->isTargetWindows()) {
623  static const struct {
624  const RTLIB::Libcall Op;
625  const char * const Name;
626  const CallingConv::ID CC;
627  } LibraryCalls[] = {
628  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
629  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
630  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
631  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
632  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
633  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
634  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
635  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
636  };
637 
638  for (const auto &LC : LibraryCalls) {
639  setLibcallName(LC.Op, LC.Name);
640  setLibcallCallingConv(LC.Op, LC.CC);
641  }
642  }
643 
644  // Use divmod compiler-rt calls for iOS 5.0 and later.
645  if (Subtarget->isTargetMachO() &&
646  !(Subtarget->isTargetIOS() &&
647  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
648  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
649  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
650  }
651 
652  // The half <-> float conversion functions are always soft-float on
653  // non-watchos platforms, but are needed for some targets which use a
654  // hard-float calling convention by default.
655  if (!Subtarget->isTargetWatchABI()) {
656  if (Subtarget->isAAPCS_ABI()) {
657  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
658  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
659  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
660  } else {
661  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
662  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
663  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
664  }
665  }
666 
667  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
668  // a __gnu_ prefix (which is the default).
669  if (Subtarget->isTargetAEABI()) {
670  static const struct {
671  const RTLIB::Libcall Op;
672  const char * const Name;
673  const CallingConv::ID CC;
674  } LibraryCalls[] = {
675  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
676  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
677  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
678  };
679 
680  for (const auto &LC : LibraryCalls) {
681  setLibcallName(LC.Op, LC.Name);
682  setLibcallCallingConv(LC.Op, LC.CC);
683  }
684  }
685 
686  if (Subtarget->isThumb1Only())
687  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
688  else
689  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
690 
691  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
692  Subtarget->hasFPRegs()) {
693  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
694  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
695  if (!Subtarget->hasVFP2Base())
696  setAllExpand(MVT::f32);
697  if (!Subtarget->hasFP64())
698  setAllExpand(MVT::f64);
699  }
700 
701  if (Subtarget->hasFullFP16()) {
702  addRegisterClass(MVT::f16, &ARM::HPRRegClass);
706 
709  }
710 
711  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
712  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
713  setTruncStoreAction(VT, InnerVT, Expand);
714  addAllExtLoads(VT, InnerVT, Expand);
715  }
716 
721 
723  }
724 
727 
730 
731  if (Subtarget->hasMVEIntegerOps())
732  addMVEVectorTypes(Subtarget->hasMVEFloatOps());
733 
734  // Combine low-overhead loop intrinsics so that we can lower i1 types.
735  if (Subtarget->hasLOB()) {
738  }
739 
740  if (Subtarget->hasNEON()) {
741  addDRTypeForNEON(MVT::v2f32);
742  addDRTypeForNEON(MVT::v8i8);
743  addDRTypeForNEON(MVT::v4i16);
744  addDRTypeForNEON(MVT::v2i32);
745  addDRTypeForNEON(MVT::v1i64);
746 
747  addQRTypeForNEON(MVT::v4f32);
748  addQRTypeForNEON(MVT::v2f64);
749  addQRTypeForNEON(MVT::v16i8);
750  addQRTypeForNEON(MVT::v8i16);
751  addQRTypeForNEON(MVT::v4i32);
752  addQRTypeForNEON(MVT::v2i64);
753 
754  if (Subtarget->hasFullFP16()) {
755  addQRTypeForNEON(MVT::v8f16);
756  addDRTypeForNEON(MVT::v4f16);
757  }
758  }
759 
760  if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
761  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
762  // none of Neon, MVE or VFP supports any arithmetic operations on it.
766  // FIXME: Code duplication: FDIV and FREM are expanded always, see
767  // ARMTargetLowering::addTypeForNEON method for details.
770  // FIXME: Create unittest.
771  // In another words, find a way when "copysign" appears in DAG with vector
772  // operands.
774  // FIXME: Code duplication: SETCC has custom operation action, see
775  // ARMTargetLowering::addTypeForNEON method for details.
777  // FIXME: Create unittest for FNEG and for FABS.
789  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
796  }
797 
798  if (Subtarget->hasNEON()) {
799  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
800  // supported for v4f32.
815 
816  // Mark v2f32 intrinsics.
831 
832  // Neon does not support some operations on v1i64 and v2i64 types.
834  // Custom handling for some quad-vector types to detect VMULL.
838  // Custom handling for some vector types to avoid expensive expansions
843  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
844  // a destination type that is wider than the source, and nor does
845  // it have a FP_TO_[SU]INT instruction with a narrower destination than
846  // source.
855 
858 
859  // NEON does not have single instruction CTPOP for vectors with element
860  // types wider than 8-bits. However, custom lowering can leverage the
861  // v8i8/v16i8 vcnt instruction.
868 
871 
872  // NEON does not have single instruction CTTZ for vectors.
877 
882 
887 
892 
893  // NEON only has FMA instructions as of VFP4.
894  if (!Subtarget->hasVFP4Base()) {
897  }
898 
909 
910  // It is legal to extload from v4i8 to v4i16 or v4i32.
912  MVT::v2i32}) {
917  }
918  }
919  }
920 
921  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
929  }
930 
931  if (!Subtarget->hasFP64()) {
932  // When targeting a floating-point unit with only single-precision
933  // operations, f64 is legal for the few double-precision instructions which
934  // are present However, no double-precision operations other than moves,
935  // loads and stores are provided by the hardware.
967  }
968 
969  if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
971  if (Subtarget->hasFullFP16())
973  }
974 
975  if (!Subtarget->hasFP16())
977 
978  if (!Subtarget->hasFP64())
980 
982 
983  // ARM does not have floating-point extending loads.
984  for (MVT VT : MVT::fp_valuetypes()) {
987  }
988 
989  // ... or truncating stores
993 
994  // ARM does not have i1 sign extending load.
995  for (MVT VT : MVT::integer_valuetypes())
997 
998  // ARM supports all 4 flavors of integer indexed load / store.
999  if (!Subtarget->isThumb1Only()) {
1000  for (unsigned im = (unsigned)ISD::PRE_INC;
1010  }
1011  } else {
1012  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1015  }
1016 
1021 
1024  if (Subtarget->hasDSP()) {
1029  }
1030  if (Subtarget->hasBaseDSP()) {
1033  }
1034 
1035  // i64 operation support.
1038  if (Subtarget->isThumb1Only()) {
1041  }
1042  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1043  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1045 
1053 
1054  // MVE lowers 64 bit shifts to lsll and lsrl
1055  // assuming that ISD::SRL and SRA of i64 are already marked custom
1056  if (Subtarget->hasMVEIntegerOps())
1058 
1059  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1060  if (Subtarget->isThumb1Only()) {
1064  }
1065 
1066  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1068 
1069  // ARM does not have ROTL.
1071  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1074  }
1077  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1080  }
1081 
1082  // @llvm.readcyclecounter requires the Performance Monitors extension.
1083  // Default to the 0 expansion on unsupported platforms.
1084  // FIXME: Technically there are older ARM CPUs that have
1085  // implementation-specific ways of obtaining this information.
1086  if (Subtarget->hasPerfMon())
1088 
1089  // Only ARMv6 has BSWAP.
1090  if (!Subtarget->hasV6Ops())
1092 
1093  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1094  : Subtarget->hasDivideInARMMode();
1095  if (!hasDivide) {
1096  // These are expanded into libcalls if the cpu doesn't have HW divider.
1099  }
1100 
1101  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1104 
1107  }
1108 
1111 
1112  // Register based DivRem for AEABI (RTABI 4.2)
1113  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1114  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1115  Subtarget->isTargetWindows()) {
1118  HasStandaloneRem = false;
1119 
1120  if (Subtarget->isTargetWindows()) {
1121  const struct {
1122  const RTLIB::Libcall Op;
1123  const char * const Name;
1124  const CallingConv::ID CC;
1125  } LibraryCalls[] = {
1126  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1127  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1128  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1129  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1130 
1131  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1132  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1133  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1134  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1135  };
1136 
1137  for (const auto &LC : LibraryCalls) {
1138  setLibcallName(LC.Op, LC.Name);
1139  setLibcallCallingConv(LC.Op, LC.CC);
1140  }
1141  } else {
1142  const struct {
1143  const RTLIB::Libcall Op;
1144  const char * const Name;
1145  const CallingConv::ID CC;
1146  } LibraryCalls[] = {
1147  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1148  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1149  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1150  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1151 
1152  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1153  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1154  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1155  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1156  };
1157 
1158  for (const auto &LC : LibraryCalls) {
1159  setLibcallName(LC.Op, LC.Name);
1160  setLibcallCallingConv(LC.Op, LC.CC);
1161  }
1162  }
1163 
1168  } else {
1171  }
1172 
1173  if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
1174  for (auto &VT : {MVT::f32, MVT::f64})
1176 
1181 
1184 
1185  // Use the default implementation.
1192 
1193  if (Subtarget->isTargetWindows())
1195  else
1197 
1198  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1199  // the default expansion.
1200  InsertFencesForAtomic = false;
1201  if (Subtarget->hasAnyDataBarrier() &&
1202  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1203  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1204  // to ldrex/strex loops already.
1206  if (!Subtarget->isThumb() || !Subtarget->isMClass())
1208 
1209  // On v8, we have particularly efficient implementations of atomic fences
1210  // if they can be combined with nearby atomic loads and stores.
1211  if (!Subtarget->hasAcquireRelease() ||
1212  getTargetMachine().getOptLevel() == 0) {
1213  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1214  InsertFencesForAtomic = true;
1215  }
1216  } else {
1217  // If there's anything we can use as a barrier, go through custom lowering
1218  // for ATOMIC_FENCE.
1219  // If target has DMB in thumb, Fences can be inserted.
1220  if (Subtarget->hasDataBarrier())
1221  InsertFencesForAtomic = true;
1222 
1224  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1225 
1226  // Set them all for expansion, which will force libcalls.
1239  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1240  // Unordered/Monotonic case.
1241  if (!InsertFencesForAtomic) {
1244  }
1245  }
1246 
1248 
1249  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1250  if (!Subtarget->hasV6Ops()) {
1253  }
1255 
1256  if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1257  !Subtarget->isThumb1Only()) {
1258  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1259  // iff target supports vfp2.
1262  }
1263 
1264  // We want to custom lower some of our intrinsics.
1269  if (Subtarget->useSjLjEH())
1270  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1271 
1281  if (Subtarget->hasFullFP16()) {
1285  }
1286 
1288 
1291  if (Subtarget->hasFullFP16())
1296 
1297  // We don't support sin/cos/fmod/copysign/pow
1306  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1307  !Subtarget->isThumb1Only()) {
1310  }
1313 
1314  if (!Subtarget->hasVFP4Base()) {
1317  }
1318 
1319  // Various VFP goodness
1320  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1321  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1322  if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1325  }
1326 
1327  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1328  if (!Subtarget->hasFP16()) {
1331  }
1332  }
1333 
1334  // Use __sincos_stret if available.
1335  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1336  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1339  }
1340 
1341  // FP-ARMv8 implements a lot of rounding-like FP operations.
1342  if (Subtarget->hasFPARMv8Base()) {
1351  if (Subtarget->hasNEON()) {
1356  }
1357 
1358  if (Subtarget->hasFP64()) {
1367  }
1368  }
1369 
1370  // FP16 often need to be promoted to call lib functions
1371  if (Subtarget->hasFullFP16()) {
1384 
1386  }
1387 
1388  if (Subtarget->hasNEON()) {
1389  // vmin and vmax aren't available in a scalar form, so we use
1390  // a NEON instruction with an undef lane instead.
1399 
1400  if (Subtarget->hasFullFP16()) {
1405 
1410  }
1411  }
1412 
1413  // We have target-specific dag combine patterns for the following nodes:
1414  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1421 
1422  if (Subtarget->hasV6Ops())
1424  if (Subtarget->isThumb1Only())
1426 
1428 
1429  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1430  !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1432  else
1434 
1435  //// temporary - rewrite interface to use type
1436  MaxStoresPerMemset = 8;
1438  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1440  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1442 
1443  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1444  // are at least 4 bytes aligned.
1446 
1447  // Prefer likely predicted branches to selects on out-of-order cores.
1448  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1449 
1450  setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1451 
1452  setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1453 
1454  if (Subtarget->isThumb() || Subtarget->isThumb2())
1456 }
1457 
1459  return Subtarget->useSoftFloat();
1460 }
1461 
1462 // FIXME: It might make sense to define the representative register class as the
1463 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1464 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1465 // SPR's representative would be DPR_VFP2. This should work well if register
1466 // pressure tracking were modified such that a register use would increment the
1467 // pressure of the register class's representative and all of it's super
1468 // classes' representatives transitively. We have not implemented this because
1469 // of the difficulty prior to coalescing of modeling operand register classes
1470 // due to the common occurrence of cross class copies and subregister insertions
1471 // and extractions.
1472 std::pair<const TargetRegisterClass *, uint8_t>
1474  MVT VT) const {
1475  const TargetRegisterClass *RRC = nullptr;
1476  uint8_t Cost = 1;
1477  switch (VT.SimpleTy) {
1478  default:
1480  // Use DPR as representative register class for all floating point
1481  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1482  // the cost is 1 for both f32 and f64.
1483  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1484  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1485  RRC = &ARM::DPRRegClass;
1486  // When NEON is used for SP, only half of the register file is available
1487  // because operations that define both SP and DP results will be constrained
1488  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1489  // coalescing by double-counting the SP regs. See the FIXME above.
1490  if (Subtarget->useNEONForSinglePrecisionFP())
1491  Cost = 2;
1492  break;
1493  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1494  case MVT::v4f32: case MVT::v2f64:
1495  RRC = &ARM::DPRRegClass;
1496  Cost = 2;
1497  break;
1498  case MVT::v4i64:
1499  RRC = &ARM::DPRRegClass;
1500  Cost = 4;
1501  break;
1502  case MVT::v8i64:
1503  RRC = &ARM::DPRRegClass;
1504  Cost = 8;
1505  break;
1506  }
1507  return std::make_pair(RRC, Cost);
1508 }
1509 
1510 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1511  switch ((ARMISD::NodeType)Opcode) {
1512  case ARMISD::FIRST_NUMBER: break;
1513  case ARMISD::Wrapper: return "ARMISD::Wrapper";
1514  case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1515  case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1516  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1517  case ARMISD::CALL: return "ARMISD::CALL";
1518  case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1519  case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1520  case ARMISD::BRCOND: return "ARMISD::BRCOND";
1521  case ARMISD::BR_JT: return "ARMISD::BR_JT";
1522  case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1523  case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1524  case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1525  case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1526  case ARMISD::CMP: return "ARMISD::CMP";
1527  case ARMISD::CMN: return "ARMISD::CMN";
1528  case ARMISD::CMPZ: return "ARMISD::CMPZ";
1529  case ARMISD::CMPFP: return "ARMISD::CMPFP";
1530  case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1531  case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1532  case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1533 
1534  case ARMISD::CMOV: return "ARMISD::CMOV";
1535  case ARMISD::SUBS: return "ARMISD::SUBS";
1536 
1537  case ARMISD::SSAT: return "ARMISD::SSAT";
1538  case ARMISD::USAT: return "ARMISD::USAT";
1539 
1540  case ARMISD::ASRL: return "ARMISD::ASRL";
1541  case ARMISD::LSRL: return "ARMISD::LSRL";
1542  case ARMISD::LSLL: return "ARMISD::LSLL";
1543 
1544  case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1545  case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1546  case ARMISD::RRX: return "ARMISD::RRX";
1547 
1548  case ARMISD::ADDC: return "ARMISD::ADDC";
1549  case ARMISD::ADDE: return "ARMISD::ADDE";
1550  case ARMISD::SUBC: return "ARMISD::SUBC";
1551  case ARMISD::SUBE: return "ARMISD::SUBE";
1552  case ARMISD::LSLS: return "ARMISD::LSLS";
1553 
1554  case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1555  case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1556  case ARMISD::VMOVhr: return "ARMISD::VMOVhr";
1557  case ARMISD::VMOVrh: return "ARMISD::VMOVrh";
1558  case ARMISD::VMOVSR: return "ARMISD::VMOVSR";
1559 
1560  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1561  case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1562  case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1563 
1564  case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1565 
1566  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1567 
1568  case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1569 
1570  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1571 
1572  case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1573 
1574  case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1575  case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1576 
1577  case ARMISD::PREDICATE_CAST: return "ARMISD::PREDICATE_CAST";
1578  case ARMISD::VCMP: return "ARMISD::VCMP";
1579  case ARMISD::VCMPZ: return "ARMISD::VCMPZ";
1580  case ARMISD::VTST: return "ARMISD::VTST";
1581 
1582  case ARMISD::VSHLs: return "ARMISD::VSHLs";
1583  case ARMISD::VSHLu: return "ARMISD::VSHLu";
1584  case ARMISD::VSHLIMM: return "ARMISD::VSHLIMM";
1585  case ARMISD::VSHRsIMM: return "ARMISD::VSHRsIMM";
1586  case ARMISD::VSHRuIMM: return "ARMISD::VSHRuIMM";
1587  case ARMISD::VRSHRsIMM: return "ARMISD::VRSHRsIMM";
1588  case ARMISD::VRSHRuIMM: return "ARMISD::VRSHRuIMM";
1589  case ARMISD::VRSHRNIMM: return "ARMISD::VRSHRNIMM";
1590  case ARMISD::VQSHLsIMM: return "ARMISD::VQSHLsIMM";
1591  case ARMISD::VQSHLuIMM: return "ARMISD::VQSHLuIMM";
1592  case ARMISD::VQSHLsuIMM: return "ARMISD::VQSHLsuIMM";
1593  case ARMISD::VQSHRNsIMM: return "ARMISD::VQSHRNsIMM";
1594  case ARMISD::VQSHRNuIMM: return "ARMISD::VQSHRNuIMM";
1595  case ARMISD::VQSHRNsuIMM: return "ARMISD::VQSHRNsuIMM";
1596  case ARMISD::VQRSHRNsIMM: return "ARMISD::VQRSHRNsIMM";
1597  case ARMISD::VQRSHRNuIMM: return "ARMISD::VQRSHRNuIMM";
1598  case ARMISD::VQRSHRNsuIMM: return "ARMISD::VQRSHRNsuIMM";
1599  case ARMISD::VSLIIMM: return "ARMISD::VSLIIMM";
1600  case ARMISD::VSRIIMM: return "ARMISD::VSRIIMM";
1601  case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1602  case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1603  case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1604  case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1605  case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1606  case ARMISD::VDUP: return "ARMISD::VDUP";
1607  case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1608  case ARMISD::VEXT: return "ARMISD::VEXT";
1609  case ARMISD::VREV64: return "ARMISD::VREV64";
1610  case ARMISD::VREV32: return "ARMISD::VREV32";
1611  case ARMISD::VREV16: return "ARMISD::VREV16";
1612  case ARMISD::VZIP: return "ARMISD::VZIP";
1613  case ARMISD::VUZP: return "ARMISD::VUZP";
1614  case ARMISD::VTRN: return "ARMISD::VTRN";
1615  case ARMISD::VTBL1: return "ARMISD::VTBL1";
1616  case ARMISD::VTBL2: return "ARMISD::VTBL2";
1617  case ARMISD::VMOVN: return "ARMISD::VMOVN";
1618  case ARMISD::VMULLs: return "ARMISD::VMULLs";
1619  case ARMISD::VMULLu: return "ARMISD::VMULLu";
1620  case ARMISD::UMAAL: return "ARMISD::UMAAL";
1621  case ARMISD::UMLAL: return "ARMISD::UMLAL";
1622  case ARMISD::SMLAL: return "ARMISD::SMLAL";
1623  case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1624  case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1625  case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1626  case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1627  case ARMISD::SMULWB: return "ARMISD::SMULWB";
1628  case ARMISD::SMULWT: return "ARMISD::SMULWT";
1629  case ARMISD::SMLALD: return "ARMISD::SMLALD";
1630  case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1631  case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1632  case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1633  case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
1634  case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1635  case ARMISD::QADD16b: return "ARMISD::QADD16b";
1636  case ARMISD::QSUB16b: return "ARMISD::QSUB16b";
1637  case ARMISD::QADD8b: return "ARMISD::QADD8b";
1638  case ARMISD::QSUB8b: return "ARMISD::QSUB8b";
1639  case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1640  case ARMISD::BFI: return "ARMISD::BFI";
1641  case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1642  case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1643  case ARMISD::VBSL: return "ARMISD::VBSL";
1644  case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1645  case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1646  case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1647  case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1648  case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1649  case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1650  case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1651  case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1652  case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1653  case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1654  case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1655  case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1656  case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1657  case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1658  case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1659  case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1660  case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1661  case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1662  case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1663  case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1664  case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1665  case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1666  case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1667  case ARMISD::WLS: return "ARMISD::WLS";
1668  case ARMISD::LE: return "ARMISD::LE";
1669  case ARMISD::LOOP_DEC: return "ARMISD::LOOP_DEC";
1670  case ARMISD::CSINV: return "ARMISD::CSINV";
1671  case ARMISD::CSNEG: return "ARMISD::CSNEG";
1672  case ARMISD::CSINC: return "ARMISD::CSINC";
1673  }
1674  return nullptr;
1675 }
1676 
1678  EVT VT) const {
1679  if (!VT.isVector())
1680  return getPointerTy(DL);
1681 
1682  // MVE has a predicate register.
1683  if (Subtarget->hasMVEIntegerOps() &&
1684  (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8))
1687 }
1688 
1689 /// getRegClassFor - Return the register class that should be used for the
1690 /// specified value type.
1691 const TargetRegisterClass *
1692 ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1693  (void)isDivergent;
1694  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1695  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1696  // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1697  // MVE Q registers.
1698  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1699  if (VT == MVT::v4i64)
1700  return &ARM::QQPRRegClass;
1701  if (VT == MVT::v8i64)
1702  return &ARM::QQQQPRRegClass;
1703  }
1704  return TargetLowering::getRegClassFor(VT);
1705 }
1706 
1707 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1708 // source/dest is aligned and the copy size is large enough. We therefore want
1709 // to align such objects passed to memory intrinsics.
1711  unsigned &PrefAlign) const {
1712  if (!isa<MemIntrinsic>(CI))
1713  return false;
1714  MinSize = 8;
1715  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1716  // cycle faster than 4-byte aligned LDM.
1717  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1718  return true;
1719 }
1720 
1721 // Create a fast isel object.
1722 FastISel *
1724  const TargetLibraryInfo *libInfo) const {
1725  return ARM::createFastISel(funcInfo, libInfo);
1726 }
1727 
1729  unsigned NumVals = N->getNumValues();
1730  if (!NumVals)
1731  return Sched::RegPressure;
1732 
1733  for (unsigned i = 0; i != NumVals; ++i) {
1734  EVT VT = N->getValueType(i);
1735  if (VT == MVT::Glue || VT == MVT::Other)
1736  continue;
1737  if (VT.isFloatingPoint() || VT.isVector())
1738  return Sched::ILP;
1739  }
1740 
1741  if (!N->isMachineOpcode())
1742  return Sched::RegPressure;
1743 
1744  // Load are scheduled for latency even if there instruction itinerary
1745  // is not available.
1746  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1747  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1748 
1749  if (MCID.getNumDefs() == 0)
1750  return Sched::RegPressure;
1751  if (!Itins->isEmpty() &&
1752  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1753  return Sched::ILP;
1754 
1755  return Sched::RegPressure;
1756 }
1757 
1758 //===----------------------------------------------------------------------===//
1759 // Lowering Code
1760 //===----------------------------------------------------------------------===//
1761 
1762 static bool isSRL16(const SDValue &Op) {
1763  if (Op.getOpcode() != ISD::SRL)
1764  return false;
1765  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1766  return Const->getZExtValue() == 16;
1767  return false;
1768 }
1769 
1770 static bool isSRA16(const SDValue &Op) {
1771  if (Op.getOpcode() != ISD::SRA)
1772  return false;
1773  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1774  return Const->getZExtValue() == 16;
1775  return false;
1776 }
1777 
1778 static bool isSHL16(const SDValue &Op) {
1779  if (Op.getOpcode() != ISD::SHL)
1780  return false;
1781  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1782  return Const->getZExtValue() == 16;
1783  return false;
1784 }
1785 
1786 // Check for a signed 16-bit value. We special case SRA because it makes it
1787 // more simple when also looking for SRAs that aren't sign extending a
1788 // smaller value. Without the check, we'd need to take extra care with
1789 // checking order for some operations.
1790 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1791  if (isSRA16(Op))
1792  return isSHL16(Op.getOperand(0));
1793  return DAG.ComputeNumSignBits(Op) == 17;
1794 }
1795 
1796 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1798  switch (CC) {
1799  default: llvm_unreachable("Unknown condition code!");
1800  case ISD::SETNE: return ARMCC::NE;
1801  case ISD::SETEQ: return ARMCC::EQ;
1802  case ISD::SETGT: return ARMCC::GT;
1803  case ISD::SETGE: return ARMCC::GE;
1804  case ISD::SETLT: return ARMCC::LT;
1805  case ISD::SETLE: return ARMCC::LE;
1806  case ISD::SETUGT: return ARMCC::HI;
1807  case ISD::SETUGE: return ARMCC::HS;
1808  case ISD::SETULT: return ARMCC::LO;
1809  case ISD::SETULE: return ARMCC::LS;
1810  }
1811 }
1812 
1813 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1815  ARMCC::CondCodes &CondCode2) {
1816  CondCode2 = ARMCC::AL;
1817  switch (CC) {
1818  default: llvm_unreachable("Unknown FP condition!");
1819  case ISD::SETEQ:
1820  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
1821  case ISD::SETGT:
1822  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1823  case ISD::SETGE:
1824  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1825  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1826  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1827  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
1828  case ISD::SETO: CondCode = ARMCC::VC; break;
1829  case ISD::SETUO: CondCode = ARMCC::VS; break;
1830  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
1831  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1832  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1833  case ISD::SETLT:
1834  case ISD::SETULT: CondCode = ARMCC::LT; break;
1835  case ISD::SETLE:
1836  case ISD::SETULE: CondCode = ARMCC::LE; break;
1837  case ISD::SETNE:
1838  case ISD::SETUNE: CondCode = ARMCC::NE; break;
1839  }
1840 }
1841 
1842 //===----------------------------------------------------------------------===//
1843 // Calling Convention Implementation
1844 //===----------------------------------------------------------------------===//
1845 
1846 /// getEffectiveCallingConv - Get the effective calling convention, taking into
1847 /// account presence of floating point hardware and calling convention
1848 /// limitations, such as support for variadic functions.
1850 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1851  bool isVarArg) const {
1852  switch (CC) {
1853  default:
1854  report_fatal_error("Unsupported calling convention");
1856  case CallingConv::ARM_APCS:
1857  case CallingConv::GHC:
1858  return CC;
1862  case CallingConv::Swift:
1864  case CallingConv::C:
1865  if (!Subtarget->isAAPCS_ABI())
1866  return CallingConv::ARM_APCS;
1867  else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
1869  !isVarArg)
1871  else
1872  return CallingConv::ARM_AAPCS;
1873  case CallingConv::Fast:
1875  if (!Subtarget->isAAPCS_ABI()) {
1876  if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
1877  return CallingConv::Fast;
1878  return CallingConv::ARM_APCS;
1879  } else if (Subtarget->hasVFP2Base() &&
1880  !Subtarget->isThumb1Only() && !isVarArg)
1882  else
1883  return CallingConv::ARM_AAPCS;
1884  }
1885 }
1886 
1888  bool isVarArg) const {
1889  return CCAssignFnForNode(CC, false, isVarArg);
1890 }
1891 
1893  bool isVarArg) const {
1894  return CCAssignFnForNode(CC, true, isVarArg);
1895 }
1896 
1897 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1898 /// CallingConvention.
1899 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1900  bool Return,
1901  bool isVarArg) const {
1902  switch (getEffectiveCallingConv(CC, isVarArg)) {
1903  default:
1904  report_fatal_error("Unsupported calling convention");
1905  case CallingConv::ARM_APCS:
1906  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1908  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1910  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1911  case CallingConv::Fast:
1912  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1913  case CallingConv::GHC:
1914  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1916  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1917  }
1918 }
1919 
1920 /// LowerCallResult - Lower the result values of a call into the
1921 /// appropriate copies out of appropriate physical registers.
1922 SDValue ARMTargetLowering::LowerCallResult(
1923  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1924  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1925  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1926  SDValue ThisVal) const {
1927  // Assign locations to each value returned by this call.
1929  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1930  *DAG.getContext());
1931  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1932 
1933  // Copy all of the result registers out of their specified physreg.
1934  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1935  CCValAssign VA = RVLocs[i];
1936 
1937  // Pass 'this' value directly from the argument to return value, to avoid
1938  // reg unit interference
1939  if (i == 0 && isThisReturn) {
1940  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1941  "unexpected return calling convention register assignment");
1942  InVals.push_back(ThisVal);
1943  continue;
1944  }
1945 
1946  SDValue Val;
1947  if (VA.needsCustom()) {
1948  // Handle f64 or half of a v2f64.
1949  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1950  InFlag);
1951  Chain = Lo.getValue(1);
1952  InFlag = Lo.getValue(2);
1953  VA = RVLocs[++i]; // skip ahead to next loc
1954  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1955  InFlag);
1956  Chain = Hi.getValue(1);
1957  InFlag = Hi.getValue(2);
1958  if (!Subtarget->isLittle())
1959  std::swap (Lo, Hi);
1960  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1961 
1962  if (VA.getLocVT() == MVT::v2f64) {
1963  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1964  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1965  DAG.getConstant(0, dl, MVT::i32));
1966 
1967  VA = RVLocs[++i]; // skip ahead to next loc
1968  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1969  Chain = Lo.getValue(1);
1970  InFlag = Lo.getValue(2);
1971  VA = RVLocs[++i]; // skip ahead to next loc
1972  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1973  Chain = Hi.getValue(1);
1974  InFlag = Hi.getValue(2);
1975  if (!Subtarget->isLittle())
1976  std::swap (Lo, Hi);
1977  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1978  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1979  DAG.getConstant(1, dl, MVT::i32));
1980  }
1981  } else {
1982  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1983  InFlag);
1984  Chain = Val.getValue(1);
1985  InFlag = Val.getValue(2);
1986  }
1987 
1988  switch (VA.getLocInfo()) {
1989  default: llvm_unreachable("Unknown loc info!");
1990  case CCValAssign::Full: break;
1991  case CCValAssign::BCvt:
1992  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1993  break;
1994  }
1995 
1996  InVals.push_back(Val);
1997  }
1998 
1999  return Chain;
2000 }
2001 
2002 /// LowerMemOpCallTo - Store the argument to the stack.
2003 SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
2004  SDValue Arg, const SDLoc &dl,
2005  SelectionDAG &DAG,
2006  const CCValAssign &VA,
2007  ISD::ArgFlagsTy Flags) const {
2008  unsigned LocMemOffset = VA.getLocMemOffset();
2009  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2010  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2011  StackPtr, PtrOff);
2012  return DAG.getStore(
2013  Chain, dl, Arg, PtrOff,
2014  MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
2015 }
2016 
2017 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2018  SDValue Chain, SDValue &Arg,
2019  RegsToPassVector &RegsToPass,
2020  CCValAssign &VA, CCValAssign &NextVA,
2021  SDValue &StackPtr,
2022  SmallVectorImpl<SDValue> &MemOpChains,
2023  ISD::ArgFlagsTy Flags) const {
2024  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2025  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2026  unsigned id = Subtarget->isLittle() ? 0 : 1;
2027  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2028 
2029  if (NextVA.isRegLoc())
2030  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2031  else {
2032  assert(NextVA.isMemLoc());
2033  if (!StackPtr.getNode())
2034  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2035  getPointerTy(DAG.getDataLayout()));
2036 
2037  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
2038  dl, DAG, NextVA,
2039  Flags));
2040  }
2041 }
2042 
2043 /// LowerCall - Lowering a call into a callseq_start <-
2044 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2045 /// nodes.
2046 SDValue
2047 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2048  SmallVectorImpl<SDValue> &InVals) const {
2049  SelectionDAG &DAG = CLI.DAG;
2050  SDLoc &dl = CLI.DL;
2052  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2054  SDValue Chain = CLI.Chain;
2055  SDValue Callee = CLI.Callee;
2056  bool &isTailCall = CLI.IsTailCall;
2057  CallingConv::ID CallConv = CLI.CallConv;
2058  bool doesNotRet = CLI.DoesNotReturn;
2059  bool isVarArg = CLI.IsVarArg;
2060 
2061  MachineFunction &MF = DAG.getMachineFunction();
2063  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2064  bool isThisReturn = false;
2065  auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
2066  bool PreferIndirect = false;
2067 
2068  // Disable tail calls if they're not supported.
2069  if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
2070  isTailCall = false;
2071 
2072  if (isa<GlobalAddressSDNode>(Callee)) {
2073  // If we're optimizing for minimum size and the function is called three or
2074  // more times in this block, we can improve codesize by calling indirectly
2075  // as BLXr has a 16-bit encoding.
2076  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2077  if (CLI.CS) {
2078  auto *BB = CLI.CS.getParent();
2079  PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2080  count_if(GV->users(), [&BB](const User *U) {
2081  return isa<Instruction>(U) &&
2082  cast<Instruction>(U)->getParent() == BB;
2083  }) > 2;
2084  }
2085  }
2086  if (isTailCall) {
2087  // Check if it's really possible to do a tail call.
2088  isTailCall = IsEligibleForTailCallOptimization(
2089  Callee, CallConv, isVarArg, isStructRet,
2090  MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2091  PreferIndirect);
2092  if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
2093  report_fatal_error("failed to perform tail call elimination on a call "
2094  "site marked musttail");
2095  // We don't support GuaranteedTailCallOpt for ARM, only automatically
2096  // detected sibcalls.
2097  if (isTailCall)
2098  ++NumTailCalls;
2099  }
2100 
2101  // Analyze operands of the call, assigning locations to each operand.
2103  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2104  *DAG.getContext());
2105  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2106 
2107  // Get a count of how many bytes are to be pushed on the stack.
2108  unsigned NumBytes = CCInfo.getNextStackOffset();
2109 
2110  if (isTailCall) {
2111  // For tail calls, memory operands are available in our caller's stack.
2112  NumBytes = 0;
2113  } else {
2114  // Adjust the stack pointer for the new arguments...
2115  // These operations are automatically eliminated by the prolog/epilog pass
2116  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
2117  }
2118 
2119  SDValue StackPtr =
2120  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2121 
2122  RegsToPassVector RegsToPass;
2123  SmallVector<SDValue, 8> MemOpChains;
2124 
2125  // Walk the register/memloc assignments, inserting copies/loads. In the case
2126  // of tail call optimization, arguments are handled later.
2127  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2128  i != e;
2129  ++i, ++realArgIdx) {
2130  CCValAssign &VA = ArgLocs[i];
2131  SDValue Arg = OutVals[realArgIdx];
2132  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2133  bool isByVal = Flags.isByVal();
2134 
2135  // Promote the value if needed.
2136  switch (VA.getLocInfo()) {
2137  default: llvm_unreachable("Unknown loc info!");
2138  case CCValAssign::Full: break;
2139  case CCValAssign::SExt:
2140  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2141  break;
2142  case CCValAssign::ZExt:
2143  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2144  break;
2145  case CCValAssign::AExt:
2146  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2147  break;
2148  case CCValAssign::BCvt:
2149  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2150  break;
2151  }
2152 
2153  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2154  if (VA.needsCustom()) {
2155  if (VA.getLocVT() == MVT::v2f64) {
2156  SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2157  DAG.getConstant(0, dl, MVT::i32));
2158  SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2159  DAG.getConstant(1, dl, MVT::i32));
2160 
2161  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
2162  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
2163 
2164  VA = ArgLocs[++i]; // skip ahead to next loc
2165  if (VA.isRegLoc()) {
2166  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
2167  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
2168  } else {
2169  assert(VA.isMemLoc());
2170 
2171  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
2172  dl, DAG, VA, Flags));
2173  }
2174  } else {
2175  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2176  StackPtr, MemOpChains, Flags);
2177  }
2178  } else if (VA.isRegLoc()) {
2179  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2180  Outs[0].VT == MVT::i32) {
2181  assert(VA.getLocVT() == MVT::i32 &&
2182  "unexpected calling convention register assignment");
2183  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2184  "unexpected use of 'returned'");
2185  isThisReturn = true;
2186  }
2187  const TargetOptions &Options = DAG.getTarget().Options;
2188  if (Options.EnableDebugEntryValues)
2189  CSInfo.emplace_back(VA.getLocReg(), i);
2190  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2191  } else if (isByVal) {
2192  assert(VA.isMemLoc());
2193  unsigned offset = 0;
2194 
2195  // True if this byval aggregate will be split between registers
2196  // and memory.
2197  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2198  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2199 
2200  if (CurByValIdx < ByValArgsCount) {
2201 
2202  unsigned RegBegin, RegEnd;
2203  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2204 
2205  EVT PtrVT =
2207  unsigned int i, j;
2208  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2209  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2210  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2211  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
2213  DAG.InferPtrAlignment(AddArg));
2214  MemOpChains.push_back(Load.getValue(1));
2215  RegsToPass.push_back(std::make_pair(j, Load));
2216  }
2217 
2218  // If parameter size outsides register area, "offset" value
2219  // helps us to calculate stack slot for remained part properly.
2220  offset = RegEnd - RegBegin;
2221 
2222  CCInfo.nextInRegsParam();
2223  }
2224 
2225  if (Flags.getByValSize() > 4*offset) {
2226  auto PtrVT = getPointerTy(DAG.getDataLayout());
2227  unsigned LocMemOffset = VA.getLocMemOffset();
2228  SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2229  SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
2230  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2231  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2232  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2233  MVT::i32);
2234  SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
2235  MVT::i32);
2236 
2237  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2238  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2239  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2240  Ops));
2241  }
2242  } else if (!isTailCall) {
2243  assert(VA.isMemLoc());
2244 
2245  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2246  dl, DAG, VA, Flags));
2247  }
2248  }
2249 
2250  if (!MemOpChains.empty())
2251  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2252 
2253  // Build a sequence of copy-to-reg nodes chained together with token chain
2254  // and flag operands which copy the outgoing args into the appropriate regs.
2255  SDValue InFlag;
2256  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2257  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2258  RegsToPass[i].second, InFlag);
2259  InFlag = Chain.getValue(1);
2260  }
2261 
2262  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2263  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2264  // node so that legalize doesn't hack it.
2265  bool isDirect = false;
2266 
2267  const TargetMachine &TM = getTargetMachine();
2268  const Module *Mod = MF.getFunction().getParent();
2269  const GlobalValue *GV = nullptr;
2270  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2271  GV = G->getGlobal();
2272  bool isStub =
2273  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2274 
2275  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2276  bool isLocalARMFunc = false;
2278  auto PtrVt = getPointerTy(DAG.getDataLayout());
2279 
2280  if (Subtarget->genLongCalls()) {
2281  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2282  "long-calls codegen is not position independent!");
2283  // Handle a global address or an external symbol. If it's not one of
2284  // those, the target's already in a register, so we don't need to do
2285  // anything extra.
2286  if (isa<GlobalAddressSDNode>(Callee)) {
2287  // Create a constant pool entry for the callee address
2288  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2289  ARMConstantPoolValue *CPV =
2290  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2291 
2292  // Get the address of the callee into a register
2293  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2294  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2295  Callee = DAG.getLoad(
2296  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2298  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2299  const char *Sym = S->getSymbol();
2300 
2301  // Create a constant pool entry for the callee address
2302  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2303  ARMConstantPoolValue *CPV =
2305  ARMPCLabelIndex, 0);
2306  // Get the address of the callee into a register
2307  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2308  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2309  Callee = DAG.getLoad(
2310  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2312  }
2313  } else if (isa<GlobalAddressSDNode>(Callee)) {
2314  if (!PreferIndirect) {
2315  isDirect = true;
2316  bool isDef = GV->isStrongDefinitionForLinker();
2317 
2318  // ARM call to a local ARM function is predicable.
2319  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2320  // tBX takes a register source operand.
2321  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2322  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2323  Callee = DAG.getNode(
2324  ARMISD::WrapperPIC, dl, PtrVt,
2325  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2326  Callee = DAG.getLoad(
2327  PtrVt, dl, DAG.getEntryNode(), Callee,
2329  /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2331  } else if (Subtarget->isTargetCOFF()) {
2332  assert(Subtarget->isTargetWindows() &&
2333  "Windows is the only supported COFF target");
2334  unsigned TargetFlags = GV->hasDLLImportStorageClass()
2337  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
2338  TargetFlags);
2339  if (GV->hasDLLImportStorageClass())
2340  Callee =
2341  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2342  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2344  } else {
2345  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2346  }
2347  }
2348  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2349  isDirect = true;
2350  // tBX takes a register source operand.
2351  const char *Sym = S->getSymbol();
2352  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2353  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2354  ARMConstantPoolValue *CPV =
2356  ARMPCLabelIndex, 4);
2357  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2358  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2359  Callee = DAG.getLoad(
2360  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2362  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2363  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2364  } else {
2365  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2366  }
2367  }
2368 
2369  // FIXME: handle tail calls differently.
2370  unsigned CallOpc;
2371  if (Subtarget->isThumb()) {
2372  if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2373  CallOpc = ARMISD::CALL_NOLINK;
2374  else
2375  CallOpc = ARMISD::CALL;
2376  } else {
2377  if (!isDirect && !Subtarget->hasV5TOps())
2378  CallOpc = ARMISD::CALL_NOLINK;
2379  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2380  // Emit regular call when code size is the priority
2381  !Subtarget->hasMinSize())
2382  // "mov lr, pc; b _foo" to avoid confusing the RSP
2383  CallOpc = ARMISD::CALL_NOLINK;
2384  else
2385  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2386  }
2387 
2388  std::vector<SDValue> Ops;
2389  Ops.push_back(Chain);
2390  Ops.push_back(Callee);
2391 
2392  // Add argument registers to the end of the list so that they are known live
2393  // into the call.
2394  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2395  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2396  RegsToPass[i].second.getValueType()));
2397 
2398  // Add a register mask operand representing the call-preserved registers.
2399  if (!isTailCall) {
2400  const uint32_t *Mask;
2401  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2402  if (isThisReturn) {
2403  // For 'this' returns, use the R0-preserving mask if applicable
2404  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2405  if (!Mask) {
2406  // Set isThisReturn to false if the calling convention is not one that
2407  // allows 'returned' to be modeled in this way, so LowerCallResult does
2408  // not try to pass 'this' straight through
2409  isThisReturn = false;
2410  Mask = ARI->getCallPreservedMask(MF, CallConv);
2411  }
2412  } else
2413  Mask = ARI->getCallPreservedMask(MF, CallConv);
2414 
2415  assert(Mask && "Missing call preserved mask for calling convention");
2416  Ops.push_back(DAG.getRegisterMask(Mask));
2417  }
2418 
2419  if (InFlag.getNode())
2420  Ops.push_back(InFlag);
2421 
2422  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2423  if (isTailCall) {
2425  SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2426  DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2427  return Ret;
2428  }
2429 
2430  // Returns a chain and a flag for retval copy to use.
2431  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2432  InFlag = Chain.getValue(1);
2433  DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2434 
2435  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2436  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2437  if (!Ins.empty())
2438  InFlag = Chain.getValue(1);
2439 
2440  // Handle result values, copying them out of physregs into vregs that we
2441  // return.
2442  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2443  InVals, isThisReturn,
2444  isThisReturn ? OutVals[0] : SDValue());
2445 }
2446 
2447 /// HandleByVal - Every parameter *after* a byval parameter is passed
2448 /// on the stack. Remember the next parameter register to allocate,
2449 /// and then confiscate the rest of the parameter registers to insure
2450 /// this.
2451 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2452  unsigned Align) const {
2453  // Byval (as with any stack) slots are always at least 4 byte aligned.
2454  Align = std::max(Align, 4U);
2455 
2456  unsigned Reg = State->AllocateReg(GPRArgRegs);
2457  if (!Reg)
2458  return;
2459 
2460  unsigned AlignInRegs = Align / 4;
2461  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2462  for (unsigned i = 0; i < Waste; ++i)
2463  Reg = State->AllocateReg(GPRArgRegs);
2464 
2465  if (!Reg)
2466  return;
2467 
2468  unsigned Excess = 4 * (ARM::R4 - Reg);
2469 
2470  // Special case when NSAA != SP and parameter size greater than size of
2471  // all remained GPR regs. In that case we can't split parameter, we must
2472  // send it to stack. We also must set NCRN to R4, so waste all
2473  // remained registers.
2474  const unsigned NSAAOffset = State->getNextStackOffset();
2475  if (NSAAOffset != 0 && Size > Excess) {
2476  while (State->AllocateReg(GPRArgRegs))
2477  ;
2478  return;
2479  }
2480 
2481  // First register for byval parameter is the first register that wasn't
2482  // allocated before this method call, so it would be "reg".
2483  // If parameter is small enough to be saved in range [reg, r4), then
2484  // the end (first after last) register would be reg + param-size-in-regs,
2485  // else parameter would be splitted between registers and stack,
2486  // end register would be r4 in this case.
2487  unsigned ByValRegBegin = Reg;
2488  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2489  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2490  // Note, first register is allocated in the beginning of function already,
2491  // allocate remained amount of registers we need.
2492  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2493  State->AllocateReg(GPRArgRegs);
2494  // A byval parameter that is split between registers and memory needs its
2495  // size truncated here.
2496  // In the case where the entire structure fits in registers, we set the
2497  // size in memory to zero.
2498  Size = std::max<int>(Size - Excess, 0);
2499 }
2500 
2501 /// MatchingStackOffset - Return true if the given stack call argument is
2502 /// already available in the same position (relatively) of the caller's
2503 /// incoming argument stack.
2504 static
2507  const TargetInstrInfo *TII) {
2508  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2509  int FI = std::numeric_limits<int>::max();
2510  if (Arg.getOpcode() == ISD::CopyFromReg) {
2511  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2512  if (!Register::isVirtualRegister(VR))
2513  return false;
2514  MachineInstr *Def = MRI->getVRegDef(VR);
2515  if (!Def)
2516  return false;
2517  if (!Flags.isByVal()) {
2518  if (!TII->isLoadFromStackSlot(*Def, FI))
2519  return false;
2520  } else {
2521  return false;
2522  }
2523  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2524  if (Flags.isByVal())
2525  // ByVal argument is passed in as a pointer but it's now being
2526  // dereferenced. e.g.
2527  // define @foo(%struct.X* %A) {
2528  // tail call @bar(%struct.X* byval %A)
2529  // }
2530  return false;
2531  SDValue Ptr = Ld->getBasePtr();
2532  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2533  if (!FINode)
2534  return false;
2535  FI = FINode->getIndex();
2536  } else
2537  return false;
2538 
2540  if (!MFI.isFixedObjectIndex(FI))
2541  return false;
2542  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2543 }
2544 
2545 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2546 /// for tail call optimization. Targets which want to do tail call
2547 /// optimization should implement this function.
2548 bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2549  SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2550  bool isCalleeStructRet, bool isCallerStructRet,
2551  const SmallVectorImpl<ISD::OutputArg> &Outs,
2552  const SmallVectorImpl<SDValue> &OutVals,
2554  const bool isIndirect) const {
2555  MachineFunction &MF = DAG.getMachineFunction();
2556  const Function &CallerF = MF.getFunction();
2557  CallingConv::ID CallerCC = CallerF.getCallingConv();
2558 
2559  assert(Subtarget->supportsTailCall());
2560 
2561  // Indirect tail calls cannot be optimized for Thumb1 if the args
2562  // to the call take up r0-r3. The reason is that there are no legal registers
2563  // left to hold the pointer to the function to be called.
2564  if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2565  (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
2566  return false;
2567 
2568  // Look for obvious safe cases to perform tail call optimization that do not
2569  // require ABI changes. This is what gcc calls sibcall.
2570 
2571  // Exception-handling functions need a special set of instructions to indicate
2572  // a return to the hardware. Tail-calling another function would probably
2573  // break this.
2574  if (CallerF.hasFnAttribute("interrupt"))
2575  return false;
2576 
2577  // Also avoid sibcall optimization if either caller or callee uses struct
2578  // return semantics.
2579  if (isCalleeStructRet || isCallerStructRet)
2580  return false;
2581 
2582  // Externally-defined functions with weak linkage should not be
2583  // tail-called on ARM when the OS does not support dynamic
2584  // pre-emption of symbols, as the AAELF spec requires normal calls
2585  // to undefined weak functions to be replaced with a NOP or jump to the
2586  // next instruction. The behaviour of branch instructions in this
2587  // situation (as used for tail calls) is implementation-defined, so we
2588  // cannot rely on the linker replacing the tail call with a return.
2589  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2590  const GlobalValue *GV = G->getGlobal();
2592  if (GV->hasExternalWeakLinkage() &&
2593  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2594  return false;
2595  }
2596 
2597  // Check that the call results are passed in the same way.
2598  LLVMContext &C = *DAG.getContext();
2599  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2600  CCAssignFnForReturn(CalleeCC, isVarArg),
2601  CCAssignFnForReturn(CallerCC, isVarArg)))
2602  return false;
2603  // The callee has to preserve all registers the caller needs to preserve.
2604  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2605  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2606  if (CalleeCC != CallerCC) {
2607  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2608  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2609  return false;
2610  }
2611 
2612  // If Caller's vararg or byval argument has been split between registers and
2613  // stack, do not perform tail call, since part of the argument is in caller's
2614  // local frame.
2615  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2616  if (AFI_Caller->getArgRegsSaveSize())
2617  return false;
2618 
2619  // If the callee takes no arguments then go on to check the results of the
2620  // call.
2621  if (!Outs.empty()) {
2622  // Check if stack adjustment is needed. For now, do not do this if any
2623  // argument is passed on the stack.
2625  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2626  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2627  if (CCInfo.getNextStackOffset()) {
2628  // Check if the arguments are already laid out in the right way as
2629  // the caller's fixed stack objects.
2630  MachineFrameInfo &MFI = MF.getFrameInfo();
2631  const MachineRegisterInfo *MRI = &MF.getRegInfo();
2632  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2633  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2634  i != e;
2635  ++i, ++realArgIdx) {
2636  CCValAssign &VA = ArgLocs[i];
2637  EVT RegVT = VA.getLocVT();
2638  SDValue Arg = OutVals[realArgIdx];
2639  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2640  if (VA.getLocInfo() == CCValAssign::Indirect)
2641  return false;
2642  if (VA.needsCustom()) {
2643  // f64 and vector types are split into multiple registers or
2644  // register/stack-slot combinations. The types will not match
2645  // the registers; give up on memory f64 refs until we figure
2646  // out what to do about this.
2647  if (!VA.isRegLoc())
2648  return false;
2649  if (!ArgLocs[++i].isRegLoc())
2650  return false;
2651  if (RegVT == MVT::v2f64) {
2652  if (!ArgLocs[++i].isRegLoc())
2653  return false;
2654  if (!ArgLocs[++i].isRegLoc())
2655  return false;
2656  }
2657  } else if (!VA.isRegLoc()) {
2658  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2659  MFI, MRI, TII))
2660  return false;
2661  }
2662  }
2663  }
2664 
2665  const MachineRegisterInfo &MRI = MF.getRegInfo();
2666  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2667  return false;
2668  }
2669 
2670  return true;
2671 }
2672 
2673 bool
2674 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2675  MachineFunction &MF, bool isVarArg,
2676  const SmallVectorImpl<ISD::OutputArg> &Outs,
2677  LLVMContext &Context) const {
2679  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2680  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2681 }
2682 
2684  const SDLoc &DL, SelectionDAG &DAG) {
2685  const MachineFunction &MF = DAG.getMachineFunction();
2686  const Function &F = MF.getFunction();
2687 
2688  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2689 
2690  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2691  // version of the "preferred return address". These offsets affect the return
2692  // instruction if this is a return from PL1 without hypervisor extensions.
2693  // IRQ/FIQ: +4 "subs pc, lr, #4"
2694  // SWI: 0 "subs pc, lr, #0"
2695  // ABORT: +4 "subs pc, lr, #4"
2696  // UNDEF: +4/+2 "subs pc, lr, #0"
2697  // UNDEF varies depending on where the exception came from ARM or Thumb
2698  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2699 
2700  int64_t LROffset;
2701  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2702  IntKind == "ABORT")
2703  LROffset = 4;
2704  else if (IntKind == "SWI" || IntKind == "UNDEF")
2705  LROffset = 0;
2706  else
2707  report_fatal_error("Unsupported interrupt attribute. If present, value "
2708  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2709 
2710  RetOps.insert(RetOps.begin() + 1,
2711  DAG.getConstant(LROffset, DL, MVT::i32, false));
2712 
2713  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2714 }
2715 
2716 SDValue
2717 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2718  bool isVarArg,
2719  const SmallVectorImpl<ISD::OutputArg> &Outs,
2720  const SmallVectorImpl<SDValue> &OutVals,
2721  const SDLoc &dl, SelectionDAG &DAG) const {
2722  // CCValAssign - represent the assignment of the return value to a location.
2724 
2725  // CCState - Info about the registers and stack slots.
2726  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2727  *DAG.getContext());
2728 
2729  // Analyze outgoing return values.
2730  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2731 
2732  SDValue Flag;
2733  SmallVector<SDValue, 4> RetOps;
2734  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2735  bool isLittleEndian = Subtarget->isLittle();
2736 
2737  MachineFunction &MF = DAG.getMachineFunction();
2739  AFI->setReturnRegsCount(RVLocs.size());
2740 
2741  // Copy the result values into the output registers.
2742  for (unsigned i = 0, realRVLocIdx = 0;
2743  i != RVLocs.size();
2744  ++i, ++realRVLocIdx) {
2745  CCValAssign &VA = RVLocs[i];
2746  assert(VA.isRegLoc() && "Can only return in registers!");
2747 
2748  SDValue Arg = OutVals[realRVLocIdx];
2749  bool ReturnF16 = false;
2750 
2751  if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
2752  // Half-precision return values can be returned like this:
2753  //
2754  // t11 f16 = fadd ...
2755  // t12: i16 = bitcast t11
2756  // t13: i32 = zero_extend t12
2757  // t14: f32 = bitcast t13 <~~~~~~~ Arg
2758  //
2759  // to avoid code generation for bitcasts, we simply set Arg to the node
2760  // that produces the f16 value, t11 in this case.
2761  //
2762  if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
2763  SDValue ZE = Arg.getOperand(0);
2764  if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
2765  SDValue BC = ZE.getOperand(0);
2766  if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
2767  Arg = BC.getOperand(0);
2768  ReturnF16 = true;
2769  }
2770  }
2771  }
2772  }
2773 
2774  switch (VA.getLocInfo()) {
2775  default: llvm_unreachable("Unknown loc info!");
2776  case CCValAssign::Full: break;
2777  case CCValAssign::BCvt:
2778  if (!ReturnF16)
2779  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2780  break;
2781  }
2782 
2783  if (VA.needsCustom()) {
2784  if (VA.getLocVT() == MVT::v2f64) {
2785  // Extract the first half and return it in two registers.
2786  SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2787  DAG.getConstant(0, dl, MVT::i32));
2788  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2789  DAG.getVTList(MVT::i32, MVT::i32), Half);
2790 
2791  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2792  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2793  Flag);
2794  Flag = Chain.getValue(1);
2795  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2796  VA = RVLocs[++i]; // skip ahead to next loc
2797  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2798  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2799  Flag);
2800  Flag = Chain.getValue(1);
2801  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2802  VA = RVLocs[++i]; // skip ahead to next loc
2803 
2804  // Extract the 2nd half and fall through to handle it as an f64 value.
2805  Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2806  DAG.getConstant(1, dl, MVT::i32));
2807  }
2808  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2809  // available.
2810  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2811  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2812  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2813  fmrrd.getValue(isLittleEndian ? 0 : 1),
2814  Flag);
2815  Flag = Chain.getValue(1);
2816  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2817  VA = RVLocs[++i]; // skip ahead to next loc
2818  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2819  fmrrd.getValue(isLittleEndian ? 1 : 0),
2820  Flag);
2821  } else
2822  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2823 
2824  // Guarantee that all emitted copies are
2825  // stuck together, avoiding something bad.
2826  Flag = Chain.getValue(1);
2827  RetOps.push_back(DAG.getRegister(VA.getLocReg(),
2828  ReturnF16 ? MVT::f16 : VA.getLocVT()));
2829  }
2830  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2831  const MCPhysReg *I =
2833  if (I) {
2834  for (; *I; ++I) {
2835  if (ARM::GPRRegClass.contains(*I))
2836  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2837  else if (ARM::DPRRegClass.contains(*I))
2838  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2839  else
2840  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2841  }
2842  }
2843 
2844  // Update chain and glue.
2845  RetOps[0] = Chain;
2846  if (Flag.getNode())
2847  RetOps.push_back(Flag);
2848 
2849  // CPUs which aren't M-class use a special sequence to return from
2850  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2851  // though we use "subs pc, lr, #N").
2852  //
2853  // M-class CPUs actually use a normal return sequence with a special
2854  // (hardware-provided) value in LR, so the normal code path works.
2855  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
2856  !Subtarget->isMClass()) {
2857  if (Subtarget->isThumb1Only())
2858  report_fatal_error("interrupt attribute is not supported in Thumb1");
2859  return LowerInterruptReturn(RetOps, dl, DAG);
2860  }
2861 
2862  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2863 }
2864 
2865 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2866  if (N->getNumValues() != 1)
2867  return false;
2868  if (!N->hasNUsesOfValue(1, 0))
2869  return false;
2870 
2871  SDValue TCChain = Chain;
2872  SDNode *Copy = *N->use_begin();
2873  if (Copy->getOpcode() == ISD::CopyToReg) {
2874  // If the copy has a glue operand, we conservatively assume it isn't safe to
2875  // perform a tail call.
2876  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2877  return false;
2878  TCChain = Copy->getOperand(0);
2879  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2880  SDNode *VMov = Copy;
2881  // f64 returned in a pair of GPRs.
2883  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2884  UI != UE; ++UI) {
2885  if (UI->getOpcode() != ISD::CopyToReg)
2886  return false;
2887  Copies.insert(*UI);
2888  }
2889  if (Copies.size() > 2)
2890  return false;
2891 
2892  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2893  UI != UE; ++UI) {
2894  SDValue UseChain = UI->getOperand(0);
2895  if (Copies.count(UseChain.getNode()))
2896  // Second CopyToReg
2897  Copy = *UI;
2898  else {
2899  // We are at the top of this chain.
2900  // If the copy has a glue operand, we conservatively assume it
2901  // isn't safe to perform a tail call.
2902  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2903  return false;
2904  // First CopyToReg
2905  TCChain = UseChain;
2906  }
2907  }
2908  } else if (Copy->getOpcode() == ISD::BITCAST) {
2909  // f32 returned in a single GPR.
2910  if (!Copy->hasOneUse())
2911  return false;
2912  Copy = *Copy->use_begin();
2913  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2914  return false;
2915  // If the copy has a glue operand, we conservatively assume it isn't safe to
2916  // perform a tail call.
2917  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2918  return false;
2919  TCChain = Copy->getOperand(0);
2920  } else {
2921  return false;
2922  }
2923 
2924  bool HasRet = false;
2925  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2926  UI != UE; ++UI) {
2927  if (UI->getOpcode() != ARMISD::RET_FLAG &&
2928  UI->getOpcode() != ARMISD::INTRET_FLAG)
2929  return false;
2930  HasRet = true;
2931  }
2932 
2933  if (!HasRet)
2934  return false;
2935 
2936  Chain = TCChain;
2937  return true;
2938 }
2939 
2940 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2941  if (!Subtarget->supportsTailCall())
2942  return false;
2943 
2944  auto Attr =
2945  CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2946  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2947  return false;
2948 
2949  return true;
2950 }
2951 
2952 // Trying to write a 64 bit value so need to split into two 32 bit values first,
2953 // and pass the lower and high parts through.
2955  SDLoc DL(Op);
2956  SDValue WriteValue = Op->getOperand(2);
2957 
2958  // This function is only supposed to be called for i64 type argument.
2959  assert(WriteValue.getValueType() == MVT::i64
2960  && "LowerWRITE_REGISTER called for non-i64 type argument.");
2961 
2962  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2963  DAG.getConstant(0, DL, MVT::i32));
2964  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2965  DAG.getConstant(1, DL, MVT::i32));
2966  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2967  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2968 }
2969 
2970 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2971 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2972 // one of the above mentioned nodes. It has to be wrapped because otherwise
2973 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2974 // be used to form addressing mode. These wrapped nodes will be selected
2975 // into MOVi.
2976 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2977  SelectionDAG &DAG) const {
2978  EVT PtrVT = Op.getValueType();
2979  // FIXME there is no actual debug info here
2980  SDLoc dl(Op);
2981  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2982  SDValue Res;
2983 
2984  // When generating execute-only code Constant Pools must be promoted to the
2985  // global data section. It's a bit ugly that we can't share them across basic
2986  // blocks, but this way we guarantee that execute-only behaves correct with
2987  // position-independent addressing modes.
2988  if (Subtarget->genExecuteOnly()) {
2989  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2990  auto T = const_cast<Type*>(CP->getType());
2991  auto C = const_cast<Constant*>(CP->getConstVal());
2992  auto M = const_cast<Module*>(DAG.getMachineFunction().
2993  getFunction().getParent());
2994  auto GV = new GlobalVariable(
2995  *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
2996  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
2997  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
2998  Twine(AFI->createPICLabelUId())
2999  );
3000  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3001  dl, PtrVT);
3002  return LowerGlobalAddress(GA, DAG);
3003  }
3004 
3005  if (CP->isMachineConstantPoolEntry())
3006  Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
3007  CP->getAlignment());
3008  else
3009  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
3010  CP->getAlignment());
3011  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3012 }
3013 
3016 }
3017 
3018 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3019  SelectionDAG &DAG) const {
3020  MachineFunction &MF = DAG.getMachineFunction();
3022  unsigned ARMPCLabelIndex = 0;
3023  SDLoc DL(Op);
3024  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3025  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3026  SDValue CPAddr;
3027  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3028  if (!IsPositionIndependent) {
3029  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
3030  } else {
3031  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3032  ARMPCLabelIndex = AFI->createPICLabelUId();
3033  ARMConstantPoolValue *CPV =
3034  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3035  ARMCP::CPBlockAddress, PCAdj);
3036  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3037  }
3038  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3039  SDValue Result = DAG.getLoad(
3040  PtrVT, DL, DAG.getEntryNode(), CPAddr,
3042  if (!IsPositionIndependent)
3043  return Result;
3044  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3045  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3046 }
3047 
3048 /// Convert a TLS address reference into the correct sequence of loads
3049 /// and calls to compute the variable's address for Darwin, and return an
3050 /// SDValue containing the final node.
3051 
3052 /// Darwin only has one TLS scheme which must be capable of dealing with the
3053 /// fully general situation, in the worst case. This means:
3054 /// + "extern __thread" declaration.
3055 /// + Defined in a possibly unknown dynamic library.
3056 ///
3057 /// The general system is that each __thread variable has a [3 x i32] descriptor
3058 /// which contains information used by the runtime to calculate the address. The
3059 /// only part of this the compiler needs to know about is the first word, which
3060 /// contains a function pointer that must be called with the address of the
3061 /// entire descriptor in "r0".
3062 ///
3063 /// Since this descriptor may be in a different unit, in general access must
3064 /// proceed along the usual ARM rules. A common sequence to produce is:
3065 ///
3066 /// movw rT1, :lower16:_var$non_lazy_ptr
3067 /// movt rT1, :upper16:_var$non_lazy_ptr
3068 /// ldr r0, [rT1]
3069 /// ldr rT2, [r0]
3070 /// blx rT2
3071 /// [...address now in r0...]
3072 SDValue
3073 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3074  SelectionDAG &DAG) const {
3075  assert(Subtarget->isTargetDarwin() &&
3076  "This function expects a Darwin target");
3077  SDLoc DL(Op);
3078 
3079  // First step is to get the address of the actua global symbol. This is where
3080  // the TLS descriptor lives.
3081  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3082 
3083  // The first entry in the descriptor is a function pointer that we must call
3084  // to obtain the address of the variable.
3085  SDValue Chain = DAG.getEntryNode();
3086  SDValue FuncTLVGet = DAG.getLoad(
3087  MVT::i32, DL, Chain, DescAddr,
3089  /* Alignment = */ 4,
3092  Chain = FuncTLVGet.getValue(1);
3093 
3095  MachineFrameInfo &MFI = F.getFrameInfo();
3096  MFI.setAdjustsStack(true);
3097 
3098  // TLS calls preserve all registers except those that absolutely must be
3099  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3100  // silly).
3101  auto TRI =
3102  getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3103  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3104  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
3105 
3106  // Finally, we can make the call. This is just a degenerate version of a
3107  // normal AArch64 call node: r0 takes the address of the descriptor, and
3108  // returns the address of the variable in this thread.
3109  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3110  Chain =
3112  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3113  DAG.getRegisterMask(Mask), Chain.getValue(1));
3114  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3115 }
3116 
3117 SDValue
3118 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3119  SelectionDAG &DAG) const {
3120  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3121 
3122  SDValue Chain = DAG.getEntryNode();
3123  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3124  SDLoc DL(Op);
3125 
3126  // Load the current TEB (thread environment block)
3127  SDValue Ops[] = {Chain,
3128  DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3129  DAG.getTargetConstant(15, DL, MVT::i32),
3130  DAG.getTargetConstant(0, DL, MVT::i32),
3131  DAG.getTargetConstant(13, DL, MVT::i32),
3132  DAG.getTargetConstant(0, DL, MVT::i32),
3133  DAG.getTargetConstant(2, DL, MVT::i32)};
3134  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3135  DAG.getVTList(MVT::i32, MVT::Other), Ops);
3136 
3137  SDValue TEB = CurrentTEB.getValue(0);
3138  Chain = CurrentTEB.getValue(1);
3139 
3140  // Load the ThreadLocalStoragePointer from the TEB
3141  // A pointer to the TLS array is located at offset 0x2c from the TEB.
3142  SDValue TLSArray =
3143  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3144  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3145 
3146  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3147  // offset into the TLSArray.
3148 
3149  // Load the TLS index from the C runtime
3150  SDValue TLSIndex =
3151  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3152  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3153  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3154 
3155  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3156  DAG.getConstant(2, DL, MVT::i32));
3157  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3158  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3159  MachinePointerInfo());
3160 
3161  // Get the offset of the start of the .tls section (section base)
3162  const auto *GA = cast<GlobalAddressSDNode>(Op);
3163  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3164  SDValue Offset = DAG.getLoad(
3165  PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3166  DAG.getTargetConstantPool(CPV, PtrVT, 4)),
3168 
3169  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3170 }
3171 
3172 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
3173 SDValue
3174 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3175  SelectionDAG &DAG) const {
3176  SDLoc dl(GA);
3177  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3178  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3179  MachineFunction &MF = DAG.getMachineFunction();
3181  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3182  ARMConstantPoolValue *CPV =
3183  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3184  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3185  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3186  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3187  Argument = DAG.getLoad(
3188  PtrVT, dl, DAG.getEntryNode(), Argument,
3190  SDValue Chain = Argument.getValue(1);
3191 
3192  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3193  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3194 
3195  // call __tls_get_addr.
3196  ArgListTy Args;
3198  Entry.Node = Argument;
3199  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3200  Args.push_back(Entry);
3201 
3202  // FIXME: is there useful debug info available here?
3204  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3206  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3207 
3208  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3209  return CallResult.first;
3210 }
3211 
3212 // Lower ISD::GlobalTLSAddress using the "initial exec" or
3213 // "local exec" model.
3214 SDValue
3215 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3216  SelectionDAG &DAG,
3217  TLSModel::Model model) const {
3218  const GlobalValue *GV = GA->getGlobal();
3219  SDLoc dl(GA);
3220  SDValue Offset;
3221  SDValue Chain = DAG.getEntryNode();
3222  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3223  // Get the Thread Pointer
3225 
3226  if (model == TLSModel::InitialExec) {
3227  MachineFunction &MF = DAG.getMachineFunction();
3229  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3230  // Initial exec model.
3231  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3232  ARMConstantPoolValue *CPV =
3233  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3235  true);
3236  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3237  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3238  Offset = DAG.getLoad(
3239  PtrVT, dl, Chain, Offset,
3241  Chain = Offset.getValue(1);
3242 
3243  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3244  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3245 
3246  Offset = DAG.getLoad(
3247  PtrVT, dl, Chain, Offset,
3249  } else {
3250  // local exec model
3251  assert(model == TLSModel::LocalExec);
3252  ARMConstantPoolValue *CPV =
3254  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3255  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3256  Offset = DAG.getLoad(
3257  PtrVT, dl, Chain, Offset,
3259  }
3260 
3261  // The address of the thread local variable is the add of the thread
3262  // pointer with the offset of the variable.
3263  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3264 }
3265 
3266 SDValue
3267 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3268  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3269  if (DAG.getTarget().useEmulatedTLS())
3270  return LowerToTLSEmulatedModel(GA, DAG);
3271 
3272  if (Subtarget->isTargetDarwin())
3273  return LowerGlobalTLSAddressDarwin(Op, DAG);
3274 
3275  if (Subtarget->isTargetWindows())
3276  return LowerGlobalTLSAddressWindows(Op, DAG);
3277 
3278  // TODO: implement the "local dynamic" model
3279  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3281 
3282  switch (model) {
3285  return LowerToTLSGeneralDynamicModel(GA, DAG);
3286  case TLSModel::InitialExec:
3287  case TLSModel::LocalExec:
3288  return LowerToTLSExecModels(GA, DAG, model);
3289  }
3290  llvm_unreachable("bogus TLS model");
3291 }
3292 
3293 /// Return true if all users of V are within function F, looking through
3294 /// ConstantExprs.
3295 static bool allUsersAreInFunction(const Value *V, const Function *F) {
3296  SmallVector<const User*,4> Worklist;
3297  for (auto *U : V->users())
3298  Worklist.push_back(U);
3299  while (!Worklist.empty()) {
3300  auto *U = Worklist.pop_back_val();
3301  if (isa<ConstantExpr>(U)) {
3302  for (auto *UU : U->users())
3303  Worklist.push_back(UU);
3304  continue;
3305  }
3306 
3307  auto *I = dyn_cast<Instruction>(U);
3308  if (!I || I->getParent()->getParent() != F)
3309  return false;
3310  }
3311  return true;
3312 }
3313 
3315  const GlobalValue *GV, SelectionDAG &DAG,
3316  EVT PtrVT, const SDLoc &dl) {
3317  // If we're creating a pool entry for a constant global with unnamed address,
3318  // and the global is small enough, we can emit it inline into the constant pool
3319  // to save ourselves an indirection.
3320  //
3321  // This is a win if the constant is only used in one function (so it doesn't
3322  // need to be duplicated) or duplicating the constant wouldn't increase code
3323  // size (implying the constant is no larger than 4 bytes).
3324  const Function &F = DAG.getMachineFunction().getFunction();
3325 
3326  // We rely on this decision to inline being idemopotent and unrelated to the
3327  // use-site. We know that if we inline a variable at one use site, we'll
3328  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3329  // doesn't know about this optimization, so bail out if it's enabled else
3330  // we could decide to inline here (and thus never emit the GV) but require
3331  // the GV from fast-isel generated code.
3332  if (!EnableConstpoolPromotion ||
3334  return SDValue();
3335 
3336  auto *GVar = dyn_cast<GlobalVariable>(GV);
3337  if (!GVar || !GVar->hasInitializer() ||
3338  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3339  !GVar->hasLocalLinkage())
3340  return SDValue();
3341 
3342  // If we inline a value that contains relocations, we move the relocations
3343  // from .data to .text. This is not allowed in position-independent code.
3344  auto *Init = GVar->getInitializer();
3345  if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3346  Init->needsRelocation())
3347  return SDValue();
3348 
3349  // The constant islands pass can only really deal with alignment requests
3350  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3351  // any type wanting greater alignment requirements than 4 bytes. We also
3352  // can only promote constants that are multiples of 4 bytes in size or
3353  // are paddable to a multiple of 4. Currently we only try and pad constants
3354  // that are strings for simplicity.
3355  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3356  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3357  unsigned Align = DAG.getDataLayout().getPreferredAlignment(GVar);
3358  unsigned RequiredPadding = 4 - (Size % 4);
3359  bool PaddingPossible =
3360  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3361  if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3362  Size == 0)
3363  return SDValue();
3364 
3365  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3366  MachineFunction &MF = DAG.getMachineFunction();
3368 
3369  // We can't bloat the constant pool too much, else the ConstantIslands pass
3370  // may fail to converge. If we haven't promoted this global yet (it may have
3371  // multiple uses), and promoting it would increase the constant pool size (Sz
3372  // > 4), ensure we have space to do so up to MaxTotal.
3373  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3374  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3376  return SDValue();
3377 
3378  // This is only valid if all users are in a single function; we can't clone
3379  // the constant in general. The LLVM IR unnamed_addr allows merging
3380  // constants, but not cloning them.
3381  //
3382  // We could potentially allow cloning if we could prove all uses of the
3383  // constant in the current function don't care about the address, like
3384  // printf format strings. But that isn't implemented for now.
3385  if (!allUsersAreInFunction(GVar, &F))
3386  return SDValue();
3387 
3388  // We're going to inline this global. Pad it out if needed.
3389  if (RequiredPadding != 4) {
3390  StringRef S = CDAInit->getAsString();
3391 
3393  std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3394  while (RequiredPadding--)
3395  V.push_back(0);
3396  Init = ConstantDataArray::get(*DAG.getContext(), V);
3397  }
3398 
3399  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3400  SDValue CPAddr =
3401  DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3402  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3405  PaddedSize - 4);
3406  }
3407  ++NumConstpoolPromoted;
3408  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3409 }
3410 
3412  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3413  if (!(GV = GA->getBaseObject()))
3414  return false;
3415  if (const auto *V = dyn_cast<GlobalVariable>(GV))
3416  return V->isConstant();
3417  return isa<Function>(GV);
3418 }
3419 
3420 SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3421  SelectionDAG &DAG) const {
3422  switch (Subtarget->getTargetTriple().getObjectFormat()) {
3423  default: llvm_unreachable("unknown object format");
3424  case Triple::COFF:
3425  return LowerGlobalAddressWindows(Op, DAG);
3426  case Triple::ELF:
3427  return LowerGlobalAddressELF(Op, DAG);
3428  case Triple::MachO:
3429  return LowerGlobalAddressDarwin(Op, DAG);
3430  }
3431 }
3432 
3433 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3434  SelectionDAG &DAG) const {
3435  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3436  SDLoc dl(Op);
3437  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3438  const TargetMachine &TM = getTargetMachine();
3439  bool IsRO = isReadOnly(GV);
3440 
3441  // promoteToConstantPool only if not generating XO text section
3442  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3443  if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3444  return V;
3445 
3446  if (isPositionIndependent()) {
3447  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3448  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3449  UseGOT_PREL ? ARMII::MO_GOT : 0);
3450  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3451  if (UseGOT_PREL)
3452  Result =
3453  DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3455  return Result;
3456  } else if (Subtarget->isROPI() && IsRO) {
3457  // PC-relative.
3458  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3459  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3460  return Result;
3461  } else if (Subtarget->isRWPI() && !IsRO) {
3462  // SB-relative.
3463  SDValue RelAddr;
3464  if (Subtarget->useMovt()) {
3465  ++NumMovwMovt;
3466  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3467  RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3468  } else { // use literal pool for address constant
3469  ARMConstantPoolValue *CPV =
3471  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3472  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3473  RelAddr = DAG.getLoad(
3474  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3476  }
3477  SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3478  SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3479  return Result;
3480  }
3481 
3482  // If we have T2 ops, we can materialize the address directly via movt/movw
3483  // pair. This is always cheaper.
3484  if (Subtarget->useMovt()) {
3485  ++NumMovwMovt;
3486  // FIXME: Once remat is capable of dealing with instructions with register
3487  // operands, expand this into two nodes.
3488  return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3489  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3490  } else {
3491  SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3492  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3493  return DAG.getLoad(
3494  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3496  }
3497 }
3498 
3499 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3500  SelectionDAG &DAG) const {
3501  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3502  "ROPI/RWPI not currently supported for Darwin");
3503  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3504  SDLoc dl(Op);
3505  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3506 
3507  if (Subtarget->useMovt())
3508  ++NumMovwMovt;
3509 
3510  // FIXME: Once remat is capable of dealing with instructions with register
3511  // operands, expand this into multiple nodes
3512  unsigned Wrapper =
3514 
3515  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3516  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3517 
3518  if (Subtarget->isGVIndirectSymbol(GV))
3519  Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3521  return Result;
3522 }
3523 
3524 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3525  SelectionDAG &DAG) const {
3526  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3527  assert(Subtarget->useMovt() &&
3528  "Windows on ARM expects to use movw/movt");
3529  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3530  "ROPI/RWPI not currently supported for Windows");
3531 
3532  const TargetMachine &TM = getTargetMachine();
3533  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3534  ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
3535  if (GV->hasDLLImportStorageClass())
3536  TargetFlags = ARMII::MO_DLLIMPORT;
3537  else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
3538  TargetFlags = ARMII::MO_COFFSTUB;
3539  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3540  SDValue Result;
3541  SDLoc DL(Op);
3542 
3543  ++NumMovwMovt;
3544 
3545  // FIXME: Once remat is capable of dealing with instructions with register
3546  // operands, expand this into two nodes.
3547  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3548  DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
3549  TargetFlags));
3550  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
3551  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3553  return Result;
3554 }
3555 
3556 SDValue
3557 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3558  SDLoc dl(Op);
3559  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3560  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3561  DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3562  Op.getOperand(1), Val);
3563 }
3564 
3565 SDValue
3566 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3567  SDLoc dl(Op);
3568  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3569  Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3570 }
3571 
3572 SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3573  SelectionDAG &DAG) const {
3574  SDLoc dl(Op);
3576  Op.getOperand(0));
3577 }
3578 
3579 SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
3580  SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
3581  unsigned IntNo =
3582  cast<ConstantSDNode>(
3584  ->getZExtValue();
3585  switch (IntNo) {
3586  default:
3587  return SDValue(); // Don't custom lower most intrinsics.
3588  case Intrinsic::arm_gnu_eabi_mcount: {
3589  MachineFunction &MF = DAG.getMachineFunction();
3590  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3591  SDLoc dl(Op);
3592  SDValue Chain = Op.getOperand(0);
3593  // call "\01__gnu_mcount_nc"
3594  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
3595  const uint32_t *Mask =
3597  assert(Mask && "Missing call preserved mask for calling convention");
3598  // Mark LR an implicit live-in.
3599  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
3600  SDValue ReturnAddress =
3601  DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
3602  std::vector<EVT> ResultTys = {MVT::Other, MVT::Glue};
3603  SDValue Callee =
3604  DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
3605  SDValue RegisterMask = DAG.getRegisterMask(Mask);
3606  if (Subtarget->isThumb())
3607  return SDValue(
3608  DAG.getMachineNode(
3609  ARM::tBL_PUSHLR, dl, ResultTys,
3610  {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
3611  DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
3612  0);
3613  return SDValue(
3614  DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
3615  {ReturnAddress, Callee, RegisterMask, Chain}),
3616  0);
3617  }
3618  }
3619 }
3620 
3621 SDValue
3622 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3623  const ARMSubtarget *Subtarget) const {
3624  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3625  SDLoc dl(Op);
3626  switch (IntNo) {
3627  default: return SDValue(); // Don't custom lower most intrinsics.
3628  case Intrinsic::thread_pointer: {
3629  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3630  return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3631  }
3632  case Intrinsic::eh_sjlj_lsda: {
3633  MachineFunction &MF = DAG.getMachineFunction();
3635  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3636  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3637  SDValue CPAddr;
3638  bool IsPositionIndependent = isPositionIndependent();
3639  unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3640  ARMConstantPoolValue *CPV =
3641  ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
3642  ARMCP::CPLSDA, PCAdj);
3643  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3644  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3645  SDValue Result = DAG.getLoad(
3646  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3648 
3649  if (IsPositionIndependent) {
3650  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3651  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3652  }
3653  return Result;
3654  }
3655  case Intrinsic::arm_neon_vabs:
3656  return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3657  Op.getOperand(1));
3658  case Intrinsic::arm_neon_vmulls:
3659  case Intrinsic::arm_neon_vmullu: {
3660  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3662  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3663  Op.getOperand(1), Op.getOperand(2));
3664  }
3665  case Intrinsic::arm_neon_vminnm:
3666  case Intrinsic::arm_neon_vmaxnm: {
3667  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3669  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3670  Op.getOperand(1), Op.getOperand(2));
3671  }
3672  case Intrinsic::arm_neon_vminu:
3673  case Intrinsic::arm_neon_vmaxu: {
3674  if (Op.getValueType().isFloatingPoint())
3675  return SDValue();
3676  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3677  ? ISD::UMIN : ISD::UMAX;
3678  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3679  Op.getOperand(1), Op.getOperand(2));
3680  }
3681  case Intrinsic::arm_neon_vmins:
3682  case Intrinsic::arm_neon_vmaxs: {
3683  // v{min,max}s is overloaded between signed integers and floats.
3684  if (!Op.getValueType().isFloatingPoint()) {
3685  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3686  ? ISD::SMIN : ISD::SMAX;
3687  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3688  Op.getOperand(1), Op.getOperand(2));
3689  }
3690  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3692  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3693  Op.getOperand(1), Op.getOperand(2));
3694  }
3695  case Intrinsic::arm_neon_vtbl1:
3696  return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3697  Op.getOperand(1), Op.getOperand(2));
3698  case Intrinsic::arm_neon_vtbl2:
3699  return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3700  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3701  }
3702 }
3703 
3705  const ARMSubtarget *Subtarget) {
3706  SDLoc dl(Op);
3707  ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3708  auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3709  if (SSID == SyncScope::SingleThread)
3710  return Op;
3711 
3712  if (!Subtarget->hasDataBarrier()) {
3713  // Some ARMv6 cpus can support data barriers with an mcr instruction.
3714  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3715  // here.
3716  assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3717  "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3718  return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3719  DAG.getConstant(0, dl, MVT::i32));
3720  }
3721 
3722  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3723  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3724  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3725  if (Subtarget->isMClass()) {
3726  // Only a full system barrier exists in the M-class architectures.
3727  Domain = ARM_MB::SY;
3728  } else if (Subtarget->preferISHSTBarriers() &&
3729  Ord == AtomicOrdering::Release) {
3730  // Swift happens to implement ISHST barriers in a way that's compatible with
3731  // Release semantics but weaker than ISH so we'd be fools not to use
3732  // it. Beware: other processors probably don't!
3733  Domain = ARM_MB::ISHST;
3734  }
3735 
3736  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3737  DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3738  DAG.getConstant(Domain, dl, MVT::i32));
3739 }
3740 
3742  const ARMSubtarget *Subtarget) {
3743  // ARM pre v5TE and Thumb1 does not have preload instructions.
3744  if (!(Subtarget->isThumb2() ||
3745  (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3746  // Just preserve the chain.
3747  return Op.getOperand(0);
3748 
3749  SDLoc dl(Op);
3750  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3751  if (!isRead &&
3752  (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3753  // ARMv7 with MP extension has PLDW.
3754  return Op.getOperand(0);
3755 
3756  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3757  if (Subtarget->isThumb()) {
3758  // Invert the bits.
3759  isRead = ~isRead & 1;
3760  isData = ~isData & 1;
3761  }
3762 
3763  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3764  Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3765  DAG.getConstant(isData, dl, MVT::i32));
3766 }
3767 
3769  MachineFunction &MF = DAG.getMachineFunction();
3770  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3771 
3772  // vastart just stores the address of the VarArgsFrameIndex slot into the
3773  // memory location argument.
3774  SDLoc dl(Op);
3775  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3776  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3777  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3778  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3779  MachinePointerInfo(SV));
3780 }
3781 
3782 SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3783  CCValAssign &NextVA,
3784  SDValue &Root,
3785  SelectionDAG &DAG,
3786  const SDLoc &dl) const {
3787  MachineFunction &MF = DAG.getMachineFunction();
3789 
3790  const TargetRegisterClass *RC;
3791  if (AFI->isThumb1OnlyFunction())
3792  RC = &ARM::tGPRRegClass;
3793  else
3794  RC = &ARM::GPRRegClass;
3795 
3796  // Transform the arguments stored in physical registers into virtual ones.
3797  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3798  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3799 
3800  SDValue ArgValue2;
3801  if (NextVA.isMemLoc()) {
3802  MachineFrameInfo &MFI = MF.getFrameInfo();
3803  int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3804 
3805  // Create load node to retrieve arguments from the stack.
3806  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3807  ArgValue2 = DAG.getLoad(
3808  MVT::i32, dl, Root, FIN,
3810  } else {
3811  Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3812  ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3813  }
3814  if (!Subtarget->isLittle())
3815  std::swap (ArgValue, ArgValue2);
3816  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3817 }
3818 
3819 // The remaining GPRs hold either the beginning of variable-argument
3820 // data, or the beginning of an aggregate passed by value (usually
3821 // byval). Either way, we allocate stack slots adjacent to the data
3822 // provided by our caller, and store the unallocated registers there.
3823 // If this is a variadic function, the va_list pointer will begin with
3824 // these values; otherwise, this reassembles a (byval) structure that
3825 // was split between registers and memory.
3826 // Return: The frame index registers were stored into.
3827 int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3828  const SDLoc &dl, SDValue &Chain,
3829  const Value *OrigArg,
3830  unsigned InRegsParamRecordIdx,
3831  int ArgOffset, unsigned ArgSize) const {
3832  // Currently, two use-cases possible:
3833  // Case #1. Non-var-args function, and we meet first byval parameter.
3834  // Setup first unallocated register as first byval register;
3835  // eat all remained registers
3836  // (these two actions are performed by HandleByVal method).
3837  // Then, here, we initialize stack frame with
3838  // "store-reg" instructions.
3839  // Case #2. Var-args function, that doesn't contain byval parameters.
3840  // The same: eat all remained unallocated registers,
3841  // initialize stack frame.
3842 
3843  MachineFunction &MF = DAG.getMachineFunction();
3844  MachineFrameInfo &MFI = MF.getFrameInfo();
3846  unsigned RBegin, REnd;
3847  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3848  CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3849  } else {
3850  unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3851  RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3852  REnd = ARM::R4;
3853  }
3854 
3855  if (REnd != RBegin)
3856  ArgOffset = -4 * (ARM::R4 - RBegin);
3857 
3858  auto PtrVT = getPointerTy(DAG.getDataLayout());
3859  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3860  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3861 
3862  SmallVector<SDValue, 4> MemOps;
3863  const TargetRegisterClass *RC =
3864  AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3865 
3866  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3867  unsigned VReg = MF.addLiveIn(Reg, RC);
3868  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3869  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3870  MachinePointerInfo(OrigArg, 4 * i));
3871  MemOps.push_back(Store);
3872  FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3873  }
3874 
3875  if (!MemOps.empty())
3876  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3877  return FrameIndex;
3878 }
3879 
3880 // Setup stack frame, the va_list pointer will start from.
3881 void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3882  const SDLoc &dl, SDValue &Chain,
3883  unsigned ArgOffset,
3884  unsigned TotalArgRegsSaveSize,
3885  bool ForceMutable) const {
3886  MachineFunction &MF = DAG.getMachineFunction();
3888 
3889  // Try to store any remaining integer argument regs
3890  // to their spots on the stack so that they may be loaded by dereferencing
3891  // the result of va_next.
3892  // If there is no regs to be stored, just point address after last
3893  // argument passed via stack.
3894  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3895  CCInfo.getInRegsParamsCount(),
3896  CCInfo.getNextStackOffset(),
3897  std::max(4U, TotalArgRegsSaveSize));
3898  AFI->setVarArgsFrameIndex(FrameIndex);
3899 }
3900 
3901 SDValue ARMTargetLowering::LowerFormalArguments(
3902  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3903  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3904  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3905  MachineFunction &MF = DAG.getMachineFunction();
3906  MachineFrameInfo &MFI = MF.getFrameInfo();
3907 
3909 
3910  // Assign locations to all of the incoming arguments.
3912  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3913  *DAG.getContext());
3914  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3915 
3916  SmallVector<SDValue, 16> ArgValues;
3917  SDValue ArgValue;
3919  unsigned CurArgIdx = 0;
3920 
3921  // Initially ArgRegsSaveSize is zero.
3922  // Then we increase this value each time we meet byval parameter.
3923  // We also increase this value in case of varargs function.
3924  AFI->setArgRegsSaveSize(0);
3925 
3926  // Calculate the amount of stack space that we need to allocate to store
3927  // byval and variadic arguments that are passed in registers.
3928  // We need to know this before we allocate the first byval or variadic
3929  // argument, as they will be allocated a stack slot below the CFA (Canonical
3930  // Frame Address, the stack pointer at entry to the function).
3931  unsigned ArgRegBegin = ARM::R4;
3932  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3933  if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3934  break;
3935 
3936  CCValAssign &VA = ArgLocs[i];
3937  unsigned Index = VA.getValNo();
3938  ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3939  if (!Flags.isByVal())
3940  continue;
3941 
3942  assert(VA.isMemLoc() && "unexpected byval pointer in reg");
3943  unsigned RBegin, REnd;
3944  CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3945  ArgRegBegin = std::min(ArgRegBegin, RBegin);
3946 
3947  CCInfo.nextInRegsParam();
3948  }
3949  CCInfo.rewindByValRegsInfo();
3950 
3951  int lastInsIndex = -1;
3952  if (isVarArg && MFI.hasVAStart()) {
3953  unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3954  if (RegIdx != array_lengthof(GPRArgRegs))
3955  ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3956  }
3957 
3958  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3959  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3960  auto PtrVT = getPointerTy(DAG.getDataLayout());
3961 
3962  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3963  CCValAssign &VA = ArgLocs[i];
3964  if (Ins[VA.getValNo()].isOrigArg()) {
3965  std::advance(CurOrigArg,
3966  Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3967  CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3968  }
3969  // Arguments stored in registers.
3970  if (VA.isRegLoc()) {
3971  EVT RegVT = VA.getLocVT();
3972 
3973  if (VA.needsCustom()) {
3974  // f64 and vector types are split up into multiple registers or
3975  // combinations of registers and stack slots.
3976  if (VA.getLocVT() == MVT::v2f64) {
3977  SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3978  Chain, DAG, dl);
3979  VA = ArgLocs[++i]; // skip ahead to next loc
3980  SDValue ArgValue2;
3981  if (VA.isMemLoc()) {
3982  int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3983  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3984  ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3986  DAG.getMachineFunction(), FI));
3987  } else {
3988  ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3989  Chain, DAG, dl);
3990  }
3991  ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3992  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3993  ArgValue, ArgValue1,
3994  DAG.getIntPtrConstant(0, dl));
3995  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3996  ArgValue, ArgValue2,
3997  DAG.getIntPtrConstant(1, dl));
3998  } else
3999  ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4000  } else {
4001  const TargetRegisterClass *RC;
4002 
4003 
4004  if (RegVT == MVT::f16)
4005  RC = &ARM::HPRRegClass;
4006  else if (RegVT == MVT::f32)
4007  RC = &ARM::SPRRegClass;
4008  else if (RegVT == MVT::f64 || RegVT == MVT::v4f16)