LLVM  13.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that ARM uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMISelLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMCallingConv.h"
18 #include "ARMConstantPoolValue.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMPerfectShuffle.h"
21 #include "ARMRegisterInfo.h"
22 #include "ARMSelectionDAGInfo.h"
23 #include "ARMSubtarget.h"
24 #include "ARMTargetTransformInfo.h"
27 #include "Utils/ARMBaseInfo.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/BitVector.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/ADT/StringExtras.h"
38 #include "llvm/ADT/StringRef.h"
39 #include "llvm/ADT/StringSwitch.h"
40 #include "llvm/ADT/Triple.h"
41 #include "llvm/ADT/Twine.h"
65 #include "llvm/IR/Attributes.h"
66 #include "llvm/IR/CallingConv.h"
67 #include "llvm/IR/Constant.h"
68 #include "llvm/IR/Constants.h"
69 #include "llvm/IR/DataLayout.h"
70 #include "llvm/IR/DebugLoc.h"
71 #include "llvm/IR/DerivedTypes.h"
72 #include "llvm/IR/Function.h"
73 #include "llvm/IR/GlobalAlias.h"
74 #include "llvm/IR/GlobalValue.h"
75 #include "llvm/IR/GlobalVariable.h"
76 #include "llvm/IR/IRBuilder.h"
77 #include "llvm/IR/InlineAsm.h"
78 #include "llvm/IR/Instruction.h"
79 #include "llvm/IR/Instructions.h"
80 #include "llvm/IR/IntrinsicInst.h"
81 #include "llvm/IR/Intrinsics.h"
82 #include "llvm/IR/IntrinsicsARM.h"
83 #include "llvm/IR/Module.h"
84 #include "llvm/IR/PatternMatch.h"
85 #include "llvm/IR/Type.h"
86 #include "llvm/IR/User.h"
87 #include "llvm/IR/Value.h"
88 #include "llvm/MC/MCInstrDesc.h"
90 #include "llvm/MC/MCRegisterInfo.h"
91 #include "llvm/MC/MCSchedule.h"
94 #include "llvm/Support/Casting.h"
95 #include "llvm/Support/CodeGen.h"
97 #include "llvm/Support/Compiler.h"
98 #include "llvm/Support/Debug.h"
100 #include "llvm/Support/KnownBits.h"
102 #include "llvm/Support/MathExtras.h"
106 #include <algorithm>
107 #include <cassert>
108 #include <cstdint>
109 #include <cstdlib>
110 #include <iterator>
111 #include <limits>
112 #include <string>
113 #include <tuple>
114 #include <utility>
115 #include <vector>
116 
117 using namespace llvm;
118 using namespace llvm::PatternMatch;
119 
120 #define DEBUG_TYPE "arm-isel"
121 
122 STATISTIC(NumTailCalls, "Number of tail calls");
123 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
124 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
125 STATISTIC(NumConstpoolPromoted,
126  "Number of constants with their storage promoted into constant pools");
127 
128 static cl::opt<bool>
129 ARMInterworking("arm-interworking", cl::Hidden,
130  cl::desc("Enable / disable ARM interworking (for debugging only)"),
131  cl::init(true));
132 
134  "arm-promote-constant", cl::Hidden,
135  cl::desc("Enable / disable promotion of unnamed_addr constants into "
136  "constant pools"),
137  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
139  "arm-promote-constant-max-size", cl::Hidden,
140  cl::desc("Maximum size of constant to promote into a constant pool"),
141  cl::init(64));
143  "arm-promote-constant-max-total", cl::Hidden,
144  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
145  cl::init(128));
146 
148 MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
149  cl::desc("Maximum interleave factor for MVE VLDn to generate."),
150  cl::init(2));
151 
152 // The APCS parameter registers.
153 static const MCPhysReg GPRArgRegs[] = {
154  ARM::R0, ARM::R1, ARM::R2, ARM::R3
155 };
156 
157 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
158  MVT PromotedBitwiseVT) {
159  if (VT != PromotedLdStVT) {
160  setOperationAction(ISD::LOAD, VT, Promote);
161  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
162 
163  setOperationAction(ISD::STORE, VT, Promote);
164  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
165  }
166 
167  MVT ElemTy = VT.getVectorElementType();
168  if (ElemTy != MVT::f64)
169  setOperationAction(ISD::SETCC, VT, Custom);
170  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
171  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
172  if (ElemTy == MVT::i32) {
173  setOperationAction(ISD::SINT_TO_FP, VT, Custom);
174  setOperationAction(ISD::UINT_TO_FP, VT, Custom);
175  setOperationAction(ISD::FP_TO_SINT, VT, Custom);
176  setOperationAction(ISD::FP_TO_UINT, VT, Custom);
177  } else {
178  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
179  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
180  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
181  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
182  }
183  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
184  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
185  setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
186  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
187  setOperationAction(ISD::SELECT, VT, Expand);
188  setOperationAction(ISD::SELECT_CC, VT, Expand);
189  setOperationAction(ISD::VSELECT, VT, Expand);
190  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
191  if (VT.isInteger()) {
192  setOperationAction(ISD::SHL, VT, Custom);
193  setOperationAction(ISD::SRA, VT, Custom);
194  setOperationAction(ISD::SRL, VT, Custom);
195  }
196 
197  // Promote all bit-wise operations.
198  if (VT.isInteger() && VT != PromotedBitwiseVT) {
199  setOperationAction(ISD::AND, VT, Promote);
200  AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
201  setOperationAction(ISD::OR, VT, Promote);
202  AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
203  setOperationAction(ISD::XOR, VT, Promote);
204  AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
205  }
206 
207  // Neon does not support vector divide/remainder operations.
208  setOperationAction(ISD::SDIV, VT, Expand);
209  setOperationAction(ISD::UDIV, VT, Expand);
210  setOperationAction(ISD::FDIV, VT, Expand);
211  setOperationAction(ISD::SREM, VT, Expand);
212  setOperationAction(ISD::UREM, VT, Expand);
213  setOperationAction(ISD::FREM, VT, Expand);
214  setOperationAction(ISD::SDIVREM, VT, Expand);
215  setOperationAction(ISD::UDIVREM, VT, Expand);
216 
217  if (!VT.isFloatingPoint() &&
218  VT != MVT::v2i64 && VT != MVT::v1i64)
219  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
220  setOperationAction(Opcode, VT, Legal);
221  if (!VT.isFloatingPoint())
222  for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
223  setOperationAction(Opcode, VT, Legal);
224 }
225 
226 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
227  addRegisterClass(VT, &ARM::DPRRegClass);
228  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
229 }
230 
231 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
232  addRegisterClass(VT, &ARM::DPairRegClass);
233  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
234 }
235 
236 void ARMTargetLowering::setAllExpand(MVT VT) {
237  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
238  setOperationAction(Opc, VT, Expand);
239 
240  // We support these really simple operations even on types where all
241  // the actual arithmetic has to be broken down into simpler
242  // operations or turned into library calls.
243  setOperationAction(ISD::BITCAST, VT, Legal);
244  setOperationAction(ISD::LOAD, VT, Legal);
245  setOperationAction(ISD::STORE, VT, Legal);
246  setOperationAction(ISD::UNDEF, VT, Legal);
247 }
248 
249 void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
250  LegalizeAction Action) {
251  setLoadExtAction(ISD::EXTLOAD, From, To, Action);
252  setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
253  setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
254 }
255 
256 void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
257  const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
258 
259  for (auto VT : IntTypes) {
260  addRegisterClass(VT, &ARM::MQPRRegClass);
261  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
262  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
263  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
264  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
265  setOperationAction(ISD::SHL, VT, Custom);
266  setOperationAction(ISD::SRA, VT, Custom);
267  setOperationAction(ISD::SRL, VT, Custom);
268  setOperationAction(ISD::SMIN, VT, Legal);
269  setOperationAction(ISD::SMAX, VT, Legal);
270  setOperationAction(ISD::UMIN, VT, Legal);
271  setOperationAction(ISD::UMAX, VT, Legal);
272  setOperationAction(ISD::ABS, VT, Legal);
273  setOperationAction(ISD::SETCC, VT, Custom);
274  setOperationAction(ISD::MLOAD, VT, Custom);
275  setOperationAction(ISD::MSTORE, VT, Legal);
276  setOperationAction(ISD::CTLZ, VT, Legal);
277  setOperationAction(ISD::CTTZ, VT, Custom);
278  setOperationAction(ISD::BITREVERSE, VT, Legal);
279  setOperationAction(ISD::BSWAP, VT, Legal);
280  setOperationAction(ISD::SADDSAT, VT, Legal);
281  setOperationAction(ISD::UADDSAT, VT, Legal);
282  setOperationAction(ISD::SSUBSAT, VT, Legal);
283  setOperationAction(ISD::USUBSAT, VT, Legal);
284 
285  // No native support for these.
286  setOperationAction(ISD::UDIV, VT, Expand);
287  setOperationAction(ISD::SDIV, VT, Expand);
288  setOperationAction(ISD::UREM, VT, Expand);
289  setOperationAction(ISD::SREM, VT, Expand);
290  setOperationAction(ISD::UDIVREM, VT, Expand);
291  setOperationAction(ISD::SDIVREM, VT, Expand);
292  setOperationAction(ISD::CTPOP, VT, Expand);
293  setOperationAction(ISD::SELECT, VT, Expand);
294  setOperationAction(ISD::SELECT_CC, VT, Expand);
295 
296  // Vector reductions
297  setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
298  setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
299  setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
300  setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
301  setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
302  setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
303  setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
304  setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
305  setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
306 
307  if (!HasMVEFP) {
308  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
309  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
310  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
311  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
312  }
313 
314  // Pre and Post inc are supported on loads and stores
315  for (unsigned im = (unsigned)ISD::PRE_INC;
316  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
317  setIndexedLoadAction(im, VT, Legal);
318  setIndexedStoreAction(im, VT, Legal);
319  setIndexedMaskedLoadAction(im, VT, Legal);
320  setIndexedMaskedStoreAction(im, VT, Legal);
321  }
322  }
323 
324  const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
325  for (auto VT : FloatTypes) {
326  addRegisterClass(VT, &ARM::MQPRRegClass);
327  if (!HasMVEFP)
328  setAllExpand(VT);
329 
330  // These are legal or custom whether we have MVE.fp or not
331  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
332  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
333  setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
334  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
335  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
336  setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
337  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
338  setOperationAction(ISD::SETCC, VT, Custom);
339  setOperationAction(ISD::MLOAD, VT, Custom);
340  setOperationAction(ISD::MSTORE, VT, Legal);
341  setOperationAction(ISD::SELECT, VT, Expand);
342  setOperationAction(ISD::SELECT_CC, VT, Expand);
343 
344  // Pre and Post inc are supported on loads and stores
345  for (unsigned im = (unsigned)ISD::PRE_INC;
346  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
347  setIndexedLoadAction(im, VT, Legal);
348  setIndexedStoreAction(im, VT, Legal);
349  setIndexedMaskedLoadAction(im, VT, Legal);
350  setIndexedMaskedStoreAction(im, VT, Legal);
351  }
352 
353  if (HasMVEFP) {
354  setOperationAction(ISD::FMINNUM, VT, Legal);
355  setOperationAction(ISD::FMAXNUM, VT, Legal);
356  setOperationAction(ISD::FROUND, VT, Legal);
357  setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
358  setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
359  setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
360  setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
361 
362  // No native support for these.
363  setOperationAction(ISD::FDIV, VT, Expand);
364  setOperationAction(ISD::FREM, VT, Expand);
365  setOperationAction(ISD::FSQRT, VT, Expand);
366  setOperationAction(ISD::FSIN, VT, Expand);
367  setOperationAction(ISD::FCOS, VT, Expand);
368  setOperationAction(ISD::FPOW, VT, Expand);
369  setOperationAction(ISD::FLOG, VT, Expand);
370  setOperationAction(ISD::FLOG2, VT, Expand);
371  setOperationAction(ISD::FLOG10, VT, Expand);
372  setOperationAction(ISD::FEXP, VT, Expand);
373  setOperationAction(ISD::FEXP2, VT, Expand);
374  setOperationAction(ISD::FNEARBYINT, VT, Expand);
375  }
376  }
377 
378  // Custom Expand smaller than legal vector reductions to prevent false zero
379  // items being added.
380  setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom);
381  setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom);
382  setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom);
383  setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom);
384  setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
385  setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom);
386  setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom);
387  setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom);
388 
389  // We 'support' these types up to bitcast/load/store level, regardless of
390  // MVE integer-only / float support. Only doing FP data processing on the FP
391  // vector types is inhibited at integer-only level.
392  const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
393  for (auto VT : LongTypes) {
394  addRegisterClass(VT, &ARM::MQPRRegClass);
395  setAllExpand(VT);
396  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
397  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
398  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
399  }
400  setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
401 
402  // We can do bitwise operations on v2i64 vectors
403  setOperationAction(ISD::AND, MVT::v2i64, Legal);
404  setOperationAction(ISD::OR, MVT::v2i64, Legal);
405  setOperationAction(ISD::XOR, MVT::v2i64, Legal);
406 
407  // It is legal to extload from v4i8 to v4i16 or v4i32.
408  addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
409  addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
410  addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
411 
412  // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
413  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
414  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
415  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
416  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal);
417  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal);
418 
419  // Some truncating stores are legal too.
420  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
421  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
422  setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
423 
424  // Pre and Post inc on these are legal, given the correct extends
425  for (unsigned im = (unsigned)ISD::PRE_INC;
426  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
427  for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
428  setIndexedLoadAction(im, VT, Legal);
429  setIndexedStoreAction(im, VT, Legal);
430  setIndexedMaskedLoadAction(im, VT, Legal);
431  setIndexedMaskedStoreAction(im, VT, Legal);
432  }
433  }
434 
435  // Predicate types
436  const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1};
437  for (auto VT : pTypes) {
438  addRegisterClass(VT, &ARM::VCCRRegClass);
439  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
440  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
441  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
442  setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
443  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
444  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
445  setOperationAction(ISD::SETCC, VT, Custom);
446  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
447  setOperationAction(ISD::LOAD, VT, Custom);
448  setOperationAction(ISD::STORE, VT, Custom);
449  setOperationAction(ISD::TRUNCATE, VT, Custom);
450  setOperationAction(ISD::VSELECT, VT, Expand);
451  setOperationAction(ISD::SELECT, VT, Expand);
452  }
453 }
454 
456  const ARMSubtarget &STI)
457  : TargetLowering(TM), Subtarget(&STI) {
458  RegInfo = Subtarget->getRegisterInfo();
459  Itins = Subtarget->getInstrItineraryData();
460 
463 
464  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
465  !Subtarget->isTargetWatchOS()) {
466  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
467  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
468  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
469  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
471  }
472 
473  if (Subtarget->isTargetMachO()) {
474  // Uses VFP for Thumb libfuncs if available.
475  if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
476  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
477  static const struct {
478  const RTLIB::Libcall Op;
479  const char * const Name;
480  const ISD::CondCode Cond;
481  } LibraryCalls[] = {
482  // Single-precision floating-point arithmetic.
483  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
484  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
485  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
486  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
487 
488  // Double-precision floating-point arithmetic.
489  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
490  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
491  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
492  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
493 
494  // Single-precision comparisons.
495  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
496  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
497  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
498  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
499  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
500  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
501  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
502 
503  // Double-precision comparisons.
504  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
505  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
506  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
507  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
508  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
509  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
510  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
511 
512  // Floating-point to integer conversions.
513  // i64 conversions are done via library routines even when generating VFP
514  // instructions, so use the same ones.
515  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
516  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
517  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
518  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
519 
520  // Conversions between floating types.
521  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
522  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
523 
524  // Integer to floating-point conversions.
525  // i64 conversions are done via library routines even when generating VFP
526  // instructions, so use the same ones.
527  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
528  // e.g., __floatunsidf vs. __floatunssidfvfp.
529  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
530  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
531  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
532  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
533  };
534 
535  for (const auto &LC : LibraryCalls) {
536  setLibcallName(LC.Op, LC.Name);
537  if (LC.Cond != ISD::SETCC_INVALID)
538  setCmpLibcallCC(LC.Op, LC.Cond);
539  }
540  }
541  }
542 
543  // These libcalls are not available in 32-bit.
544  setLibcallName(RTLIB::SHL_I128, nullptr);
545  setLibcallName(RTLIB::SRL_I128, nullptr);
546  setLibcallName(RTLIB::SRA_I128, nullptr);
547 
548  // RTLIB
549  if (Subtarget->isAAPCS_ABI() &&
550  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
551  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
552  static const struct {
553  const RTLIB::Libcall Op;
554  const char * const Name;
555  const CallingConv::ID CC;
556  const ISD::CondCode Cond;
557  } LibraryCalls[] = {
558  // Double-precision floating-point arithmetic helper functions
559  // RTABI chapter 4.1.2, Table 2
560  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
561  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
562  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
563  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
564 
565  // Double-precision floating-point comparison helper functions
566  // RTABI chapter 4.1.2, Table 3
567  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
568  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
569  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
570  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
571  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
572  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
573  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
574 
575  // Single-precision floating-point arithmetic helper functions
576  // RTABI chapter 4.1.2, Table 4
577  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
578  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
579  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
580  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
581 
582  // Single-precision floating-point comparison helper functions
583  // RTABI chapter 4.1.2, Table 5
584  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
585  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
586  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
587  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
588  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
589  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
590  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
591 
592  // Floating-point to integer conversions.
593  // RTABI chapter 4.1.2, Table 6
594  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
595  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
596  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
597  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
598  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
599  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
600  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
601  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
602 
603  // Conversions between floating types.
604  // RTABI chapter 4.1.2, Table 7
605  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
606  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
607  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
608 
609  // Integer to floating-point conversions.
610  // RTABI chapter 4.1.2, Table 8
611  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
612  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
613  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
614  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
615  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
616  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
617  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
618  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
619 
620  // Long long helper functions
621  // RTABI chapter 4.2, Table 9
622  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
623  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
624  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
625  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
626 
627  // Integer division functions
628  // RTABI chapter 4.3.1
629  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
630  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
631  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
634  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
635  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
636  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637  };
638 
639  for (const auto &LC : LibraryCalls) {
640  setLibcallName(LC.Op, LC.Name);
641  setLibcallCallingConv(LC.Op, LC.CC);
642  if (LC.Cond != ISD::SETCC_INVALID)
643  setCmpLibcallCC(LC.Op, LC.Cond);
644  }
645 
646  // EABI dependent RTLIB
647  if (TM.Options.EABIVersion == EABI::EABI4 ||
648  TM.Options.EABIVersion == EABI::EABI5) {
649  static const struct {
650  const RTLIB::Libcall Op;
651  const char *const Name;
652  const CallingConv::ID CC;
653  const ISD::CondCode Cond;
654  } MemOpsLibraryCalls[] = {
655  // Memory operations
656  // RTABI chapter 4.3.4
658  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
659  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
660  };
661 
662  for (const auto &LC : MemOpsLibraryCalls) {
663  setLibcallName(LC.Op, LC.Name);
664  setLibcallCallingConv(LC.Op, LC.CC);
665  if (LC.Cond != ISD::SETCC_INVALID)
666  setCmpLibcallCC(LC.Op, LC.Cond);
667  }
668  }
669  }
670 
671  if (Subtarget->isTargetWindows()) {
672  static const struct {
673  const RTLIB::Libcall Op;
674  const char * const Name;
675  const CallingConv::ID CC;
676  } LibraryCalls[] = {
677  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
678  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
679  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
680  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
681  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
682  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
683  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
684  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
685  };
686 
687  for (const auto &LC : LibraryCalls) {
688  setLibcallName(LC.Op, LC.Name);
689  setLibcallCallingConv(LC.Op, LC.CC);
690  }
691  }
692 
693  // Use divmod compiler-rt calls for iOS 5.0 and later.
694  if (Subtarget->isTargetMachO() &&
695  !(Subtarget->isTargetIOS() &&
696  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
697  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
698  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
699  }
700 
701  // The half <-> float conversion functions are always soft-float on
702  // non-watchos platforms, but are needed for some targets which use a
703  // hard-float calling convention by default.
704  if (!Subtarget->isTargetWatchABI()) {
705  if (Subtarget->isAAPCS_ABI()) {
706  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
707  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
708  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
709  } else {
710  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
711  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
712  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
713  }
714  }
715 
716  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
717  // a __gnu_ prefix (which is the default).
718  if (Subtarget->isTargetAEABI()) {
719  static const struct {
720  const RTLIB::Libcall Op;
721  const char * const Name;
722  const CallingConv::ID CC;
723  } LibraryCalls[] = {
724  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
725  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
726  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
727  };
728 
729  for (const auto &LC : LibraryCalls) {
730  setLibcallName(LC.Op, LC.Name);
731  setLibcallCallingConv(LC.Op, LC.CC);
732  }
733  }
734 
735  if (Subtarget->isThumb1Only())
736  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
737  else
738  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
739 
740  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
741  Subtarget->hasFPRegs()) {
742  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
743  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
744  if (!Subtarget->hasVFP2Base())
745  setAllExpand(MVT::f32);
746  if (!Subtarget->hasFP64())
747  setAllExpand(MVT::f64);
748  }
749 
750  if (Subtarget->hasFullFP16()) {
751  addRegisterClass(MVT::f16, &ARM::HPRRegClass);
754 
757  }
758 
759  if (Subtarget->hasBF16()) {
760  addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
761  setAllExpand(MVT::bf16);
762  if (!Subtarget->hasFullFP16())
764  }
765 
766  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
767  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
768  setTruncStoreAction(VT, InnerVT, Expand);
769  addAllExtLoads(VT, InnerVT, Expand);
770  }
771 
776 
778  }
779 
782 
785 
786  if (Subtarget->hasMVEIntegerOps())
787  addMVEVectorTypes(Subtarget->hasMVEFloatOps());
788 
789  // Combine low-overhead loop intrinsics so that we can lower i1 types.
790  if (Subtarget->hasLOB()) {
793  }
794 
795  if (Subtarget->hasNEON()) {
796  addDRTypeForNEON(MVT::v2f32);
797  addDRTypeForNEON(MVT::v8i8);
798  addDRTypeForNEON(MVT::v4i16);
799  addDRTypeForNEON(MVT::v2i32);
800  addDRTypeForNEON(MVT::v1i64);
801 
802  addQRTypeForNEON(MVT::v4f32);
803  addQRTypeForNEON(MVT::v2f64);
804  addQRTypeForNEON(MVT::v16i8);
805  addQRTypeForNEON(MVT::v8i16);
806  addQRTypeForNEON(MVT::v4i32);
807  addQRTypeForNEON(MVT::v2i64);
808 
809  if (Subtarget->hasFullFP16()) {
810  addQRTypeForNEON(MVT::v8f16);
811  addDRTypeForNEON(MVT::v4f16);
812  }
813 
814  if (Subtarget->hasBF16()) {
815  addQRTypeForNEON(MVT::v8bf16);
816  addDRTypeForNEON(MVT::v4bf16);
817  }
818  }
819 
820  if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
821  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
822  // none of Neon, MVE or VFP supports any arithmetic operations on it.
826  // FIXME: Code duplication: FDIV and FREM are expanded always, see
827  // ARMTargetLowering::addTypeForNEON method for details.
830  // FIXME: Create unittest.
831  // In another words, find a way when "copysign" appears in DAG with vector
832  // operands.
834  // FIXME: Code duplication: SETCC has custom operation action, see
835  // ARMTargetLowering::addTypeForNEON method for details.
837  // FIXME: Create unittest for FNEG and for FABS.
849  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
856  }
857 
858  if (Subtarget->hasNEON()) {
859  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
860  // supported for v4f32.
875 
876  // Mark v2f32 intrinsics.
891 
892  // Neon does not support some operations on v1i64 and v2i64 types.
894  // Custom handling for some quad-vector types to detect VMULL.
898  // Custom handling for some vector types to avoid expensive expansions
903  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
904  // a destination type that is wider than the source, and nor does
905  // it have a FP_TO_[SU]INT instruction with a narrower destination than
906  // source.
915 
918 
919  // NEON does not have single instruction CTPOP for vectors with element
920  // types wider than 8-bits. However, custom lowering can leverage the
921  // v8i8/v16i8 vcnt instruction.
928 
931 
932  // NEON does not have single instruction CTTZ for vectors.
937 
942 
947 
952 
953  // NEON only has FMA instructions as of VFP4.
954  if (!Subtarget->hasVFP4Base()) {
957  }
958 
966 
967  // It is legal to extload from v4i8 to v4i16 or v4i32.
969  MVT::v2i32}) {
974  }
975  }
976  }
977 
978  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
994  }
995  if (Subtarget->hasMVEIntegerOps()) {
1003  }
1004 
1005  if (!Subtarget->hasFP64()) {
1006  // When targeting a floating-point unit with only single-precision
1007  // operations, f64 is legal for the few double-precision instructions which
1008  // are present However, no double-precision operations other than moves,
1009  // loads and stores are provided by the hardware.
1046  }
1047 
1048  if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1051  if (Subtarget->hasFullFP16()) {
1054  }
1055  }
1056 
1057  if (!Subtarget->hasFP16()) {
1060  }
1061 
1063 
1064  // ARM does not have floating-point extending loads.
1065  for (MVT VT : MVT::fp_valuetypes()) {
1068  }
1069 
1070  // ... or truncating stores
1074 
1075  // ARM does not have i1 sign extending load.
1076  for (MVT VT : MVT::integer_valuetypes())
1078 
1079  // ARM supports all 4 flavors of integer indexed load / store.
1080  if (!Subtarget->isThumb1Only()) {
1081  for (unsigned im = (unsigned)ISD::PRE_INC;
1082  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1091  }
1092  } else {
1093  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1096  }
1097 
1102 
1105  if (Subtarget->hasDSP()) {
1110  }
1111  if (Subtarget->hasBaseDSP()) {
1114  }
1115 
1116  // i64 operation support.
1119  if (Subtarget->isThumb1Only()) {
1122  }
1123  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1124  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1126 
1136 
1137  // MVE lowers 64 bit shifts to lsll and lsrl
1138  // assuming that ISD::SRL and SRA of i64 are already marked custom
1139  if (Subtarget->hasMVEIntegerOps())
1141 
1142  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1143  if (Subtarget->isThumb1Only()) {
1147  }
1148 
1149  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1151 
1152  // ARM does not have ROTL.
1154  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1157  }
1160  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1163  }
1164 
1165  // @llvm.readcyclecounter requires the Performance Monitors extension.
1166  // Default to the 0 expansion on unsupported platforms.
1167  // FIXME: Technically there are older ARM CPUs that have
1168  // implementation-specific ways of obtaining this information.
1169  if (Subtarget->hasPerfMon())
1171 
1172  // Only ARMv6 has BSWAP.
1173  if (!Subtarget->hasV6Ops())
1175 
1176  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1177  : Subtarget->hasDivideInARMMode();
1178  if (!hasDivide) {
1179  // These are expanded into libcalls if the cpu doesn't have HW divider.
1182  }
1183 
1184  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1187 
1190  }
1191 
1194 
1195  // Register based DivRem for AEABI (RTABI 4.2)
1196  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1197  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1198  Subtarget->isTargetWindows()) {
1201  HasStandaloneRem = false;
1202 
1203  if (Subtarget->isTargetWindows()) {
1204  const struct {
1205  const RTLIB::Libcall Op;
1206  const char * const Name;
1207  const CallingConv::ID CC;
1208  } LibraryCalls[] = {
1209  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1210  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1211  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1212  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1213 
1214  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1215  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1216  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1217  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1218  };
1219 
1220  for (const auto &LC : LibraryCalls) {
1221  setLibcallName(LC.Op, LC.Name);
1222  setLibcallCallingConv(LC.Op, LC.CC);
1223  }
1224  } else {
1225  const struct {
1226  const RTLIB::Libcall Op;
1227  const char * const Name;
1228  const CallingConv::ID CC;
1229  } LibraryCalls[] = {
1230  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1231  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1232  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1233  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1234 
1235  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1236  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1237  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1238  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1239  };
1240 
1241  for (const auto &LC : LibraryCalls) {
1242  setLibcallName(LC.Op, LC.Name);
1243  setLibcallCallingConv(LC.Op, LC.CC);
1244  }
1245  }
1246 
1251  } else {
1254  }
1255 
1256  if (Subtarget->getTargetTriple().isOSMSVCRT()) {
1257  // MSVCRT doesn't have powi; fall back to pow
1258  setLibcallName(RTLIB::POWI_F32, nullptr);
1259  setLibcallName(RTLIB::POWI_F64, nullptr);
1260  }
1261 
1266 
1269 
1270  // Use the default implementation.
1277 
1278  if (Subtarget->isTargetWindows())
1280  else
1282 
1283  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1284  // the default expansion.
1285  InsertFencesForAtomic = false;
1286  if (Subtarget->hasAnyDataBarrier() &&
1287  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1288  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1289  // to ldrex/strex loops already.
1291  if (!Subtarget->isThumb() || !Subtarget->isMClass())
1293 
1294  // On v8, we have particularly efficient implementations of atomic fences
1295  // if they can be combined with nearby atomic loads and stores.
1296  if (!Subtarget->hasAcquireRelease() ||
1297  getTargetMachine().getOptLevel() == 0) {
1298  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1299  InsertFencesForAtomic = true;
1300  }
1301  } else {
1302  // If there's anything we can use as a barrier, go through custom lowering
1303  // for ATOMIC_FENCE.
1304  // If target has DMB in thumb, Fences can be inserted.
1305  if (Subtarget->hasDataBarrier())
1306  InsertFencesForAtomic = true;
1307 
1309  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1310 
1311  // Set them all for expansion, which will force libcalls.
1324  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1325  // Unordered/Monotonic case.
1326  if (!InsertFencesForAtomic) {
1329  }
1330  }
1331 
1333 
1334  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1335  if (!Subtarget->hasV6Ops()) {
1338  }
1340 
1341  if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1342  !Subtarget->isThumb1Only()) {
1343  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1344  // iff target supports vfp2.
1348  }
1349 
1350  // We want to custom lower some of our intrinsics.
1355  if (Subtarget->useSjLjEH())
1356  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1357 
1367  if (Subtarget->hasFullFP16()) {
1371  }
1372 
1374 
1377  if (Subtarget->hasFullFP16())
1382 
1383  // We don't support sin/cos/fmod/copysign/pow
1392  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1393  !Subtarget->isThumb1Only()) {
1396  }
1399 
1400  if (!Subtarget->hasVFP4Base()) {
1403  }
1404 
1405  // Various VFP goodness
1406  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1407  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1408  if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1411  }
1412 
1413  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1414  if (!Subtarget->hasFP16()) {
1417  }
1418 
1419  // Strict floating-point comparisons need custom lowering.
1426  }
1427 
1428  // Use __sincos_stret if available.
1429  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1430  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1433  }
1434 
1435  // FP-ARMv8 implements a lot of rounding-like FP operations.
1436  if (Subtarget->hasFPARMv8Base()) {
1445  if (Subtarget->hasNEON()) {
1450  }
1451 
1452  if (Subtarget->hasFP64()) {
1461  }
1462  }
1463 
1464  // FP16 often need to be promoted to call lib functions
1465  if (Subtarget->hasFullFP16()) {
1478 
1480  }
1481 
1482  if (Subtarget->hasNEON()) {
1483  // vmin and vmax aren't available in a scalar form, so we can use
1484  // a NEON instruction with an undef lane instead. This has a performance
1485  // penalty on some cores, so we don't do this unless we have been
1486  // asked to by the core tuning model.
1487  if (Subtarget->useNEONForSinglePrecisionFP()) {
1492  }
1497 
1498  if (Subtarget->hasFullFP16()) {
1503 
1508  }
1509  }
1510 
1511  // We have target-specific dag combine patterns for the following nodes:
1512  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1519 
1520  if (Subtarget->hasMVEIntegerOps())
1522 
1523  if (Subtarget->hasV6Ops())
1525  if (Subtarget->isThumb1Only())
1527 
1529 
1530  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1531  !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1533  else
1535 
1536  //// temporary - rewrite interface to use type
1537  MaxStoresPerMemset = 8;
1539  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1541  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1543 
1544  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1545  // are at least 4 bytes aligned.
1547 
1548  // Prefer likely predicted branches to selects on out-of-order cores.
1549  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1550 
1551  setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1552 
1553  setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1554 
1555  if (Subtarget->isThumb() || Subtarget->isThumb2())
1557 }
1558 
1560  return Subtarget->useSoftFloat();
1561 }
1562 
1563 // FIXME: It might make sense to define the representative register class as the
1564 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1565 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1566 // SPR's representative would be DPR_VFP2. This should work well if register
1567 // pressure tracking were modified such that a register use would increment the
1568 // pressure of the register class's representative and all of it's super
1569 // classes' representatives transitively. We have not implemented this because
1570 // of the difficulty prior to coalescing of modeling operand register classes
1571 // due to the common occurrence of cross class copies and subregister insertions
1572 // and extractions.
1573 std::pair<const TargetRegisterClass *, uint8_t>
1575  MVT VT) const {
1576  const TargetRegisterClass *RRC = nullptr;
1577  uint8_t Cost = 1;
1578  switch (VT.SimpleTy) {
1579  default:
1581  // Use DPR as representative register class for all floating point
1582  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1583  // the cost is 1 for both f32 and f64.
1584  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1585  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1586  RRC = &ARM::DPRRegClass;
1587  // When NEON is used for SP, only half of the register file is available
1588  // because operations that define both SP and DP results will be constrained
1589  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1590  // coalescing by double-counting the SP regs. See the FIXME above.
1591  if (Subtarget->useNEONForSinglePrecisionFP())
1592  Cost = 2;
1593  break;
1594  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1595  case MVT::v4f32: case MVT::v2f64:
1596  RRC = &ARM::DPRRegClass;
1597  Cost = 2;
1598  break;
1599  case MVT::v4i64:
1600  RRC = &ARM::DPRRegClass;
1601  Cost = 4;
1602  break;
1603  case MVT::v8i64:
1604  RRC = &ARM::DPRRegClass;
1605  Cost = 8;
1606  break;
1607  }
1608  return std::make_pair(RRC, Cost);
1609 }
1610 
1611 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1612  switch ((ARMISD::NodeType)Opcode) {
1613  case ARMISD::FIRST_NUMBER: break;
1614  case ARMISD::Wrapper: return "ARMISD::Wrapper";
1615  case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1616  case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1617  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1618  case ARMISD::CALL: return "ARMISD::CALL";
1619  case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1620  case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1621  case ARMISD::tSECALL: return "ARMISD::tSECALL";
1622  case ARMISD::BRCOND: return "ARMISD::BRCOND";
1623  case ARMISD::BR_JT: return "ARMISD::BR_JT";
1624  case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1625  case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1626  case ARMISD::SERET_FLAG: return "ARMISD::SERET_FLAG";
1627  case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1628  case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1629  case ARMISD::CMP: return "ARMISD::CMP";
1630  case ARMISD::CMN: return "ARMISD::CMN";
1631  case ARMISD::CMPZ: return "ARMISD::CMPZ";
1632  case ARMISD::CMPFP: return "ARMISD::CMPFP";
1633  case ARMISD::CMPFPE: return "ARMISD::CMPFPE";
1634  case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1635  case ARMISD::CMPFPEw0: return "ARMISD::CMPFPEw0";
1636  case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1637  case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1638 
1639  case ARMISD::CMOV: return "ARMISD::CMOV";
1640  case ARMISD::SUBS: return "ARMISD::SUBS";
1641 
1642  case ARMISD::SSAT: return "ARMISD::SSAT";
1643  case ARMISD::USAT: return "ARMISD::USAT";
1644 
1645  case ARMISD::ASRL: return "ARMISD::ASRL";
1646  case ARMISD::LSRL: return "ARMISD::LSRL";
1647  case ARMISD::LSLL: return "ARMISD::LSLL";
1648 
1649  case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1650  case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1651  case ARMISD::RRX: return "ARMISD::RRX";
1652 
1653  case ARMISD::ADDC: return "ARMISD::ADDC";
1654  case ARMISD::ADDE: return "ARMISD::ADDE";
1655  case ARMISD::SUBC: return "ARMISD::SUBC";
1656  case ARMISD::SUBE: return "ARMISD::SUBE";
1657  case ARMISD::LSLS: return "ARMISD::LSLS";
1658 
1659  case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1660  case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1661  case ARMISD::VMOVhr: return "ARMISD::VMOVhr";
1662  case ARMISD::VMOVrh: return "ARMISD::VMOVrh";
1663  case ARMISD::VMOVSR: return "ARMISD::VMOVSR";
1664 
1665  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1666  case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1667  case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1668 
1669  case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1670 
1671  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1672 
1673  case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1674 
1675  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1676 
1677  case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1678 
1679  case ARMISD::LDRD: return "ARMISD::LDRD";
1680  case ARMISD::STRD: return "ARMISD::STRD";
1681 
1682  case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1683  case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1684 
1685  case ARMISD::PREDICATE_CAST: return "ARMISD::PREDICATE_CAST";
1686  case ARMISD::VECTOR_REG_CAST: return "ARMISD::VECTOR_REG_CAST";
1687  case ARMISD::VCMP: return "ARMISD::VCMP";
1688  case ARMISD::VCMPZ: return "ARMISD::VCMPZ";
1689  case ARMISD::VTST: return "ARMISD::VTST";
1690 
1691  case ARMISD::VSHLs: return "ARMISD::VSHLs";
1692  case ARMISD::VSHLu: return "ARMISD::VSHLu";
1693  case ARMISD::VSHLIMM: return "ARMISD::VSHLIMM";
1694  case ARMISD::VSHRsIMM: return "ARMISD::VSHRsIMM";
1695  case ARMISD::VSHRuIMM: return "ARMISD::VSHRuIMM";
1696  case ARMISD::VRSHRsIMM: return "ARMISD::VRSHRsIMM";
1697  case ARMISD::VRSHRuIMM: return "ARMISD::VRSHRuIMM";
1698  case ARMISD::VRSHRNIMM: return "ARMISD::VRSHRNIMM";
1699  case ARMISD::VQSHLsIMM: return "ARMISD::VQSHLsIMM";
1700  case ARMISD::VQSHLuIMM: return "ARMISD::VQSHLuIMM";
1701  case ARMISD::VQSHLsuIMM: return "ARMISD::VQSHLsuIMM";
1702  case ARMISD::VQSHRNsIMM: return "ARMISD::VQSHRNsIMM";
1703  case ARMISD::VQSHRNuIMM: return "ARMISD::VQSHRNuIMM";
1704  case ARMISD::VQSHRNsuIMM: return "ARMISD::VQSHRNsuIMM";
1705  case ARMISD::VQRSHRNsIMM: return "ARMISD::VQRSHRNsIMM";
1706  case ARMISD::VQRSHRNuIMM: return "ARMISD::VQRSHRNuIMM";
1707  case ARMISD::VQRSHRNsuIMM: return "ARMISD::VQRSHRNsuIMM";
1708  case ARMISD::VSLIIMM: return "ARMISD::VSLIIMM";
1709  case ARMISD::VSRIIMM: return "ARMISD::VSRIIMM";
1710  case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1711  case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1712  case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1713  case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1714  case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1715  case ARMISD::VDUP: return "ARMISD::VDUP";
1716  case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1717  case ARMISD::VEXT: return "ARMISD::VEXT";
1718  case ARMISD::VREV64: return "ARMISD::VREV64";
1719  case ARMISD::VREV32: return "ARMISD::VREV32";
1720  case ARMISD::VREV16: return "ARMISD::VREV16";
1721  case ARMISD::VZIP: return "ARMISD::VZIP";
1722  case ARMISD::VUZP: return "ARMISD::VUZP";
1723  case ARMISD::VTRN: return "ARMISD::VTRN";
1724  case ARMISD::VTBL1: return "ARMISD::VTBL1";
1725  case ARMISD::VTBL2: return "ARMISD::VTBL2";
1726  case ARMISD::VMOVN: return "ARMISD::VMOVN";
1727  case ARMISD::VQMOVNs: return "ARMISD::VQMOVNs";
1728  case ARMISD::VQMOVNu: return "ARMISD::VQMOVNu";
1729  case ARMISD::VCVTN: return "ARMISD::VCVTN";
1730  case ARMISD::VCVTL: return "ARMISD::VCVTL";
1731  case ARMISD::VMULLs: return "ARMISD::VMULLs";
1732  case ARMISD::VMULLu: return "ARMISD::VMULLu";
1733  case ARMISD::VQDMULH: return "ARMISD::VQDMULH";
1734  case ARMISD::VADDVs: return "ARMISD::VADDVs";
1735  case ARMISD::VADDVu: return "ARMISD::VADDVu";
1736  case ARMISD::VADDVps: return "ARMISD::VADDVps";
1737  case ARMISD::VADDVpu: return "ARMISD::VADDVpu";
1738  case ARMISD::VADDLVs: return "ARMISD::VADDLVs";
1739  case ARMISD::VADDLVu: return "ARMISD::VADDLVu";
1740  case ARMISD::VADDLVAs: return "ARMISD::VADDLVAs";
1741  case ARMISD::VADDLVAu: return "ARMISD::VADDLVAu";
1742  case ARMISD::VADDLVps: return "ARMISD::VADDLVps";
1743  case ARMISD::VADDLVpu: return "ARMISD::VADDLVpu";
1744  case ARMISD::VADDLVAps: return "ARMISD::VADDLVAps";
1745  case ARMISD::VADDLVApu: return "ARMISD::VADDLVApu";
1746  case ARMISD::VMLAVs: return "ARMISD::VMLAVs";
1747  case ARMISD::VMLAVu: return "ARMISD::VMLAVu";
1748  case ARMISD::VMLAVps: return "ARMISD::VMLAVps";
1749  case ARMISD::VMLAVpu: return "ARMISD::VMLAVpu";
1750  case ARMISD::VMLALVs: return "ARMISD::VMLALVs";
1751  case ARMISD::VMLALVu: return "ARMISD::VMLALVu";
1752  case ARMISD::VMLALVps: return "ARMISD::VMLALVps";
1753  case ARMISD::VMLALVpu: return "ARMISD::VMLALVpu";
1754  case ARMISD::VMLALVAs: return "ARMISD::VMLALVAs";
1755  case ARMISD::VMLALVAu: return "ARMISD::VMLALVAu";
1756  case ARMISD::VMLALVAps: return "ARMISD::VMLALVAps";
1757  case ARMISD::VMLALVApu: return "ARMISD::VMLALVApu";
1758  case ARMISD::VMINVu: return "ARMISD::VMINVu";
1759  case ARMISD::VMINVs: return "ARMISD::VMINVs";
1760  case ARMISD::VMAXVu: return "ARMISD::VMAXVu";
1761  case ARMISD::VMAXVs: return "ARMISD::VMAXVs";
1762  case ARMISD::UMAAL: return "ARMISD::UMAAL";
1763  case ARMISD::UMLAL: return "ARMISD::UMLAL";
1764  case ARMISD::SMLAL: return "ARMISD::SMLAL";
1765  case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1766  case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1767  case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1768  case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1769  case ARMISD::SMULWB: return "ARMISD::SMULWB";
1770  case ARMISD::SMULWT: return "ARMISD::SMULWT";
1771  case ARMISD::SMLALD: return "ARMISD::SMLALD";
1772  case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1773  case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1774  case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1775  case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
1776  case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1777  case ARMISD::QADD16b: return "ARMISD::QADD16b";
1778  case ARMISD::QSUB16b: return "ARMISD::QSUB16b";
1779  case ARMISD::QADD8b: return "ARMISD::QADD8b";
1780  case ARMISD::QSUB8b: return "ARMISD::QSUB8b";
1781  case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1782  case ARMISD::BFI: return "ARMISD::BFI";
1783  case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1784  case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1785  case ARMISD::VBSP: return "ARMISD::VBSP";
1786  case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1787  case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1788  case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1789  case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1790  case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1791  case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1792  case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1793  case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1794  case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1795  case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1796  case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1797  case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1798  case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1799  case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1800  case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1801  case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1802  case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1803  case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1804  case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1805  case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1806  case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1807  case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1808  case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1809  case ARMISD::WLS: return "ARMISD::WLS";
1810  case ARMISD::WLSSETUP: return "ARMISD::WLSSETUP";
1811  case ARMISD::LE: return "ARMISD::LE";
1812  case ARMISD::LOOP_DEC: return "ARMISD::LOOP_DEC";
1813  case ARMISD::CSINV: return "ARMISD::CSINV";
1814  case ARMISD::CSNEG: return "ARMISD::CSNEG";
1815  case ARMISD::CSINC: return "ARMISD::CSINC";
1816  }
1817  return nullptr;
1818 }
1819 
1821  EVT VT) const {
1822  if (!VT.isVector())
1823  return getPointerTy(DL);
1824 
1825  // MVE has a predicate register.
1826  if (Subtarget->hasMVEIntegerOps() &&
1827  (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8))
1830 }
1831 
1832 /// getRegClassFor - Return the register class that should be used for the
1833 /// specified value type.
1834 const TargetRegisterClass *
1835 ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1836  (void)isDivergent;
1837  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1838  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1839  // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1840  // MVE Q registers.
1841  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1842  if (VT == MVT::v4i64)
1843  return &ARM::QQPRRegClass;
1844  if (VT == MVT::v8i64)
1845  return &ARM::QQQQPRRegClass;
1846  }
1847  return TargetLowering::getRegClassFor(VT);
1848 }
1849 
1850 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1851 // source/dest is aligned and the copy size is large enough. We therefore want
1852 // to align such objects passed to memory intrinsics.
1854  unsigned &PrefAlign) const {
1855  if (!isa<MemIntrinsic>(CI))
1856  return false;
1857  MinSize = 8;
1858  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1859  // cycle faster than 4-byte aligned LDM.
1860  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1861  return true;
1862 }
1863 
1864 // Create a fast isel object.
1865 FastISel *
1867  const TargetLibraryInfo *libInfo) const {
1868  return ARM::createFastISel(funcInfo, libInfo);
1869 }
1870 
1872  unsigned NumVals = N->getNumValues();
1873  if (!NumVals)
1874  return Sched::RegPressure;
1875 
1876  for (unsigned i = 0; i != NumVals; ++i) {
1877  EVT VT = N->getValueType(i);
1878  if (VT == MVT::Glue || VT == MVT::Other)
1879  continue;
1880  if (VT.isFloatingPoint() || VT.isVector())
1881  return Sched::ILP;
1882  }
1883 
1884  if (!N->isMachineOpcode())
1885  return Sched::RegPressure;
1886 
1887  // Load are scheduled for latency even if there instruction itinerary
1888  // is not available.
1889  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1890  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1891 
1892  if (MCID.getNumDefs() == 0)
1893  return Sched::RegPressure;
1894  if (!Itins->isEmpty() &&
1895  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1896  return Sched::ILP;
1897 
1898  return Sched::RegPressure;
1899 }
1900 
1901 //===----------------------------------------------------------------------===//
1902 // Lowering Code
1903 //===----------------------------------------------------------------------===//
1904 
1905 static bool isSRL16(const SDValue &Op) {
1906  if (Op.getOpcode() != ISD::SRL)
1907  return false;
1908  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1909  return Const->getZExtValue() == 16;
1910  return false;
1911 }
1912 
1913 static bool isSRA16(const SDValue &Op) {
1914  if (Op.getOpcode() != ISD::SRA)
1915  return false;
1916  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1917  return Const->getZExtValue() == 16;
1918  return false;
1919 }
1920 
1921 static bool isSHL16(const SDValue &Op) {
1922  if (Op.getOpcode() != ISD::SHL)
1923  return false;
1924  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1925  return Const->getZExtValue() == 16;
1926  return false;
1927 }
1928 
1929 // Check for a signed 16-bit value. We special case SRA because it makes it
1930 // more simple when also looking for SRAs that aren't sign extending a
1931 // smaller value. Without the check, we'd need to take extra care with
1932 // checking order for some operations.
1933 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1934  if (isSRA16(Op))
1935  return isSHL16(Op.getOperand(0));
1936  return DAG.ComputeNumSignBits(Op) == 17;
1937 }
1938 
1939 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1941  switch (CC) {
1942  default: llvm_unreachable("Unknown condition code!");
1943  case ISD::SETNE: return ARMCC::NE;
1944  case ISD::SETEQ: return ARMCC::EQ;
1945  case ISD::SETGT: return ARMCC::GT;
1946  case ISD::SETGE: return ARMCC::GE;
1947  case ISD::SETLT: return ARMCC::LT;
1948  case ISD::SETLE: return ARMCC::LE;
1949  case ISD::SETUGT: return ARMCC::HI;
1950  case ISD::SETUGE: return ARMCC::HS;
1951  case ISD::SETULT: return ARMCC::LO;
1952  case ISD::SETULE: return ARMCC::LS;
1953  }
1954 }
1955 
1956 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1958  ARMCC::CondCodes &CondCode2) {
1959  CondCode2 = ARMCC::AL;
1960  switch (CC) {
1961  default: llvm_unreachable("Unknown FP condition!");
1962  case ISD::SETEQ:
1963  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
1964  case ISD::SETGT:
1965  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1966  case ISD::SETGE:
1967  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1968  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1969  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1970  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
1971  case ISD::SETO: CondCode = ARMCC::VC; break;
1972  case ISD::SETUO: CondCode = ARMCC::VS; break;
1973  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
1974  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1975  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1976  case ISD::SETLT:
1977  case ISD::SETULT: CondCode = ARMCC::LT; break;
1978  case ISD::SETLE:
1979  case ISD::SETULE: CondCode = ARMCC::LE; break;
1980  case ISD::SETNE:
1981  case ISD::SETUNE: CondCode = ARMCC::NE; break;
1982  }
1983 }
1984 
1985 //===----------------------------------------------------------------------===//
1986 // Calling Convention Implementation
1987 //===----------------------------------------------------------------------===//
1988 
1989 /// getEffectiveCallingConv - Get the effective calling convention, taking into
1990 /// account presence of floating point hardware and calling convention
1991 /// limitations, such as support for variadic functions.
1993 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1994  bool isVarArg) const {
1995  switch (CC) {
1996  default:
1997  report_fatal_error("Unsupported calling convention");
1999  case CallingConv::ARM_APCS:
2000  case CallingConv::GHC:
2002  return CC;
2006  case CallingConv::Swift:
2008  case CallingConv::C:
2009  if (!Subtarget->isAAPCS_ABI())
2010  return CallingConv::ARM_APCS;
2011  else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
2012  getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2013  !isVarArg)
2015  else
2016  return CallingConv::ARM_AAPCS;
2017  case CallingConv::Fast:
2019  if (!Subtarget->isAAPCS_ABI()) {
2020  if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2021  return CallingConv::Fast;
2022  return CallingConv::ARM_APCS;
2023  } else if (Subtarget->hasVFP2Base() &&
2024  !Subtarget->isThumb1Only() && !isVarArg)
2026  else
2027  return CallingConv::ARM_AAPCS;
2028  }
2029 }
2030 
2032  bool isVarArg) const {
2033  return CCAssignFnForNode(CC, false, isVarArg);
2034 }
2035 
2037  bool isVarArg) const {
2038  return CCAssignFnForNode(CC, true, isVarArg);
2039 }
2040 
2041 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2042 /// CallingConvention.
2043 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2044  bool Return,
2045  bool isVarArg) const {
2046  switch (getEffectiveCallingConv(CC, isVarArg)) {
2047  default:
2048  report_fatal_error("Unsupported calling convention");
2049  case CallingConv::ARM_APCS:
2050  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2052  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2054  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2055  case CallingConv::Fast:
2056  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2057  case CallingConv::GHC:
2058  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2060  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2062  return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2063  }
2064 }
2065 
2066 SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2067  MVT LocVT, MVT ValVT, SDValue Val) const {
2068  Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2069  Val);
2070  if (Subtarget->hasFullFP16()) {
2071  Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2072  } else {
2073  Val = DAG.getNode(ISD::TRUNCATE, dl,
2074  MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2075  Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2076  }
2077  return Val;
2078 }
2079 
2080 SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2081  MVT LocVT, MVT ValVT,
2082  SDValue Val) const {
2083  if (Subtarget->hasFullFP16()) {
2084  Val = DAG.getNode(ARMISD::VMOVrh, dl,
2085  MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2086  } else {
2087  Val = DAG.getNode(ISD::BITCAST, dl,
2088  MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2089  Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2090  MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2091  }
2092  return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2093 }
2094 
2095 /// LowerCallResult - Lower the result values of a call into the
2096 /// appropriate copies out of appropriate physical registers.
2097 SDValue ARMTargetLowering::LowerCallResult(
2098  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2099  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2100  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2101  SDValue ThisVal) const {
2102  // Assign locations to each value returned by this call.
2104  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2105  *DAG.getContext());
2106  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2107 
2108  // Copy all of the result registers out of their specified physreg.
2109  for (unsigned i = 0; i != RVLocs.size(); ++i) {
2110  CCValAssign VA = RVLocs[i];
2111 
2112  // Pass 'this' value directly from the argument to return value, to avoid
2113  // reg unit interference
2114  if (i == 0 && isThisReturn) {
2115  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2116  "unexpected return calling convention register assignment");
2117  InVals.push_back(ThisVal);
2118  continue;
2119  }
2120 
2121  SDValue Val;
2122  if (VA.needsCustom() &&
2123  (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2124  // Handle f64 or half of a v2f64.
2125  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2126  InFlag);
2127  Chain = Lo.getValue(1);
2128  InFlag = Lo.getValue(2);
2129  VA = RVLocs[++i]; // skip ahead to next loc
2130  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2131  InFlag);
2132  Chain = Hi.getValue(1);
2133  InFlag = Hi.getValue(2);
2134  if (!Subtarget->isLittle())
2135  std::swap (Lo, Hi);
2136  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2137 
2138  if (VA.getLocVT() == MVT::v2f64) {
2139  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2140  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2141  DAG.getConstant(0, dl, MVT::i32));
2142 
2143  VA = RVLocs[++i]; // skip ahead to next loc
2144  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2145  Chain = Lo.getValue(1);
2146  InFlag = Lo.getValue(2);
2147  VA = RVLocs[++i]; // skip ahead to next loc
2148  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2149  Chain = Hi.getValue(1);
2150  InFlag = Hi.getValue(2);
2151  if (!Subtarget->isLittle())
2152  std::swap (Lo, Hi);
2153  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2154  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2155  DAG.getConstant(1, dl, MVT::i32));
2156  }
2157  } else {
2158  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2159  InFlag);
2160  Chain = Val.getValue(1);
2161  InFlag = Val.getValue(2);
2162  }
2163 
2164  switch (VA.getLocInfo()) {
2165  default: llvm_unreachable("Unknown loc info!");
2166  case CCValAssign::Full: break;
2167  case CCValAssign::BCvt:
2168  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2169  break;
2170  }
2171 
2172  // f16 arguments have their size extended to 4 bytes and passed as if they
2173  // had been copied to the LSBs of a 32-bit register.
2174  // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2175  if (VA.needsCustom() &&
2176  (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2177  Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2178 
2179  InVals.push_back(Val);
2180  }
2181 
2182  return Chain;
2183 }
2184 
2185 /// LowerMemOpCallTo - Store the argument to the stack.
2186 SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
2187  SDValue Arg, const SDLoc &dl,
2188  SelectionDAG &DAG,
2189  const CCValAssign &VA,
2190  ISD::ArgFlagsTy Flags) const {
2191  unsigned LocMemOffset = VA.getLocMemOffset();
2192  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2193  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2194  StackPtr, PtrOff);
2195  return DAG.getStore(
2196  Chain, dl, Arg, PtrOff,
2197  MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
2198 }
2199 
2200 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2201  SDValue Chain, SDValue &Arg,
2202  RegsToPassVector &RegsToPass,
2203  CCValAssign &VA, CCValAssign &NextVA,
2204  SDValue &StackPtr,
2205  SmallVectorImpl<SDValue> &MemOpChains,
2206  ISD::ArgFlagsTy Flags) const {
2207  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2208  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2209  unsigned id = Subtarget->isLittle() ? 0 : 1;
2210  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2211 
2212  if (NextVA.isRegLoc())
2213  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2214  else {
2215  assert(NextVA.isMemLoc());
2216  if (!StackPtr.getNode())
2217  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2218  getPointerTy(DAG.getDataLayout()));
2219 
2220  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
2221  dl, DAG, NextVA,
2222  Flags));
2223  }
2224 }
2225 
2226 /// LowerCall - Lowering a call into a callseq_start <-
2227 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2228 /// nodes.
2229 SDValue
2230 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2231  SmallVectorImpl<SDValue> &InVals) const {
2232  SelectionDAG &DAG = CLI.DAG;
2233  SDLoc &dl = CLI.DL;
2235  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2237  SDValue Chain = CLI.Chain;
2238  SDValue Callee = CLI.Callee;
2239  bool &isTailCall = CLI.IsTailCall;
2240  CallingConv::ID CallConv = CLI.CallConv;
2241  bool doesNotRet = CLI.DoesNotReturn;
2242  bool isVarArg = CLI.IsVarArg;
2243 
2244  MachineFunction &MF = DAG.getMachineFunction();
2247  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2248  bool isThisReturn = false;
2249  bool isCmseNSCall = false;
2250  bool PreferIndirect = false;
2251 
2252  // Determine whether this is a non-secure function call.
2253  if (CLI.CB && CLI.CB->getAttributes().hasFnAttribute("cmse_nonsecure_call"))
2254  isCmseNSCall = true;
2255 
2256  // Disable tail calls if they're not supported.
2257  if (!Subtarget->supportsTailCall())
2258  isTailCall = false;
2259 
2260  // For both the non-secure calls and the returns from a CMSE entry function,
2261  // the function needs to do some extra work afte r the call, or before the
2262  // return, respectively, thus it cannot end with atail call
2263  if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2264  isTailCall = false;
2265 
2266  if (isa<GlobalAddressSDNode>(Callee)) {
2267  // If we're optimizing for minimum size and the function is called three or
2268  // more times in this block, we can improve codesize by calling indirectly
2269  // as BLXr has a 16-bit encoding.
2270  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2271  if (CLI.CB) {
2272  auto *BB = CLI.CB->getParent();
2273  PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2274  count_if(GV->users(), [&BB](const User *U) {
2275  return isa<Instruction>(U) &&
2276  cast<Instruction>(U)->getParent() == BB;
2277  }) > 2;
2278  }
2279  }
2280  if (isTailCall) {
2281  // Check if it's really possible to do a tail call.
2282  isTailCall = IsEligibleForTailCallOptimization(
2283  Callee, CallConv, isVarArg, isStructRet,
2284  MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2285  PreferIndirect);
2286  if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2287  report_fatal_error("failed to perform tail call elimination on a call "
2288  "site marked musttail");
2289  // We don't support GuaranteedTailCallOpt for ARM, only automatically
2290  // detected sibcalls.
2291  if (isTailCall)
2292  ++NumTailCalls;
2293  }
2294 
2295  // Analyze operands of the call, assigning locations to each operand.
2297  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2298  *DAG.getContext());
2299  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2300 
2301  // Get a count of how many bytes are to be pushed on the stack.
2302  unsigned NumBytes = CCInfo.getNextStackOffset();
2303 
2304  if (isTailCall) {
2305  // For tail calls, memory operands are available in our caller's stack.
2306  NumBytes = 0;
2307  } else {
2308  // Adjust the stack pointer for the new arguments...
2309  // These operations are automatically eliminated by the prolog/epilog pass
2310  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
2311  }
2312 
2313  SDValue StackPtr =
2314  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2315 
2316  RegsToPassVector RegsToPass;
2317  SmallVector<SDValue, 8> MemOpChains;
2318 
2319  // Walk the register/memloc assignments, inserting copies/loads. In the case
2320  // of tail call optimization, arguments are handled later.
2321  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2322  i != e;
2323  ++i, ++realArgIdx) {
2324  CCValAssign &VA = ArgLocs[i];
2325  SDValue Arg = OutVals[realArgIdx];
2326  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2327  bool isByVal = Flags.isByVal();
2328 
2329  // Promote the value if needed.
2330  switch (VA.getLocInfo()) {
2331  default: llvm_unreachable("Unknown loc info!");
2332  case CCValAssign::Full: break;
2333  case CCValAssign::SExt:
2334  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2335  break;
2336  case CCValAssign::ZExt:
2337  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2338  break;
2339  case CCValAssign::AExt:
2340  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2341  break;
2342  case CCValAssign::BCvt:
2343  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2344  break;
2345  }
2346 
2347  // f16 arguments have their size extended to 4 bytes and passed as if they
2348  // had been copied to the LSBs of a 32-bit register.
2349  // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2350  if (VA.needsCustom() &&
2351  (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2352  Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2353  } else {
2354  // f16 arguments could have been extended prior to argument lowering.
2355  // Mask them arguments if this is a CMSE nonsecure call.
2356  auto ArgVT = Outs[realArgIdx].ArgVT;
2357  if (isCmseNSCall && (ArgVT == MVT::f16)) {
2358  auto LocBits = VA.getLocVT().getSizeInBits();
2359  auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2360  SDValue Mask =
2361  DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2362  Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2363  Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2364  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2365  }
2366  }
2367 
2368  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2369  if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2371  DAG.getConstant(0, dl, MVT::i32));
2373  DAG.getConstant(1, dl, MVT::i32));
2374 
2375  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2376  StackPtr, MemOpChains, Flags);
2377 
2378  VA = ArgLocs[++i]; // skip ahead to next loc
2379  if (VA.isRegLoc()) {
2380  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2381  StackPtr, MemOpChains, Flags);
2382  } else {
2383  assert(VA.isMemLoc());
2384 
2385  MemOpChains.push_back(
2386  LowerMemOpCallTo(Chain, StackPtr, Op1, dl, DAG, VA, Flags));
2387  }
2388  } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2389  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2390  StackPtr, MemOpChains, Flags);
2391  } else if (VA.isRegLoc()) {
2392  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2393  Outs[0].VT == MVT::i32) {
2394  assert(VA.getLocVT() == MVT::i32 &&
2395  "unexpected calling convention register assignment");
2396  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2397  "unexpected use of 'returned'");
2398  isThisReturn = true;
2399  }
2400  const TargetOptions &Options = DAG.getTarget().Options;
2401  if (Options.EmitCallSiteInfo)
2402  CSInfo.emplace_back(VA.getLocReg(), i);
2403  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2404  } else if (isByVal) {
2405  assert(VA.isMemLoc());
2406  unsigned offset = 0;
2407 
2408  // True if this byval aggregate will be split between registers
2409  // and memory.
2410  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2411  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2412 
2413  if (CurByValIdx < ByValArgsCount) {
2414 
2415  unsigned RegBegin, RegEnd;
2416  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2417 
2418  EVT PtrVT =
2420  unsigned int i, j;
2421  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2422  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2423  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2424  SDValue Load =
2425  DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2426  DAG.InferPtrAlign(AddArg));
2427  MemOpChains.push_back(Load.getValue(1));
2428  RegsToPass.push_back(std::make_pair(j, Load));
2429  }
2430 
2431  // If parameter size outsides register area, "offset" value
2432  // helps us to calculate stack slot for remained part properly.
2433  offset = RegEnd - RegBegin;
2434 
2435  CCInfo.nextInRegsParam();
2436  }
2437 
2438  if (Flags.getByValSize() > 4*offset) {
2439  auto PtrVT = getPointerTy(DAG.getDataLayout());
2440  unsigned LocMemOffset = VA.getLocMemOffset();
2441  SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2442  SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
2443  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2444  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2445  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2446  MVT::i32);
2447  SDValue AlignNode =
2448  DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2449 
2450  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2451  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2452  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2453  Ops));
2454  }
2455  } else if (!isTailCall) {
2456  assert(VA.isMemLoc());
2457 
2458  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2459  dl, DAG, VA, Flags));
2460  }
2461  }
2462 
2463  if (!MemOpChains.empty())
2464  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2465 
2466  // Build a sequence of copy-to-reg nodes chained together with token chain
2467  // and flag operands which copy the outgoing args into the appropriate regs.
2468  SDValue InFlag;
2469  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2470  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2471  RegsToPass[i].second, InFlag);
2472  InFlag = Chain.getValue(1);
2473  }
2474 
2475  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2476  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2477  // node so that legalize doesn't hack it.
2478  bool isDirect = false;
2479 
2480  const TargetMachine &TM = getTargetMachine();
2481  const Module *Mod = MF.getFunction().getParent();
2482  const GlobalValue *GV = nullptr;
2483  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2484  GV = G->getGlobal();
2485  bool isStub =
2486  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2487 
2488  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2489  bool isLocalARMFunc = false;
2490  auto PtrVt = getPointerTy(DAG.getDataLayout());
2491 
2492  if (Subtarget->genLongCalls()) {
2493  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2494  "long-calls codegen is not position independent!");
2495  // Handle a global address or an external symbol. If it's not one of
2496  // those, the target's already in a register, so we don't need to do
2497  // anything extra.
2498  if (isa<GlobalAddressSDNode>(Callee)) {
2499  // Create a constant pool entry for the callee address
2500  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2501  ARMConstantPoolValue *CPV =
2502  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2503 
2504  // Get the address of the callee into a register
2505  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2506  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2507  Callee = DAG.getLoad(
2508  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2510  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2511  const char *Sym = S->getSymbol();
2512 
2513  // Create a constant pool entry for the callee address
2514  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2515  ARMConstantPoolValue *CPV =
2517  ARMPCLabelIndex, 0);
2518  // Get the address of the callee into a register
2519  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2520  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2521  Callee = DAG.getLoad(
2522  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2524  }
2525  } else if (isa<GlobalAddressSDNode>(Callee)) {
2526  if (!PreferIndirect) {
2527  isDirect = true;
2528  bool isDef = GV->isStrongDefinitionForLinker();
2529 
2530  // ARM call to a local ARM function is predicable.
2531  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2532  // tBX takes a register source operand.
2533  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2534  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2535  Callee = DAG.getNode(
2536  ARMISD::WrapperPIC, dl, PtrVt,
2537  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2538  Callee = DAG.getLoad(
2539  PtrVt, dl, DAG.getEntryNode(), Callee,
2543  } else if (Subtarget->isTargetCOFF()) {
2544  assert(Subtarget->isTargetWindows() &&
2545  "Windows is the only supported COFF target");
2546  unsigned TargetFlags = ARMII::MO_NO_FLAG;
2547  if (GV->hasDLLImportStorageClass())
2548  TargetFlags = ARMII::MO_DLLIMPORT;
2549  else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
2550  TargetFlags = ARMII::MO_COFFSTUB;
2551  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
2552  TargetFlags);
2553  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2554  Callee =
2555  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2556  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2558  } else {
2559  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2560  }
2561  }
2562  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2563  isDirect = true;
2564  // tBX takes a register source operand.
2565  const char *Sym = S->getSymbol();
2566  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2567  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2568  ARMConstantPoolValue *CPV =
2570  ARMPCLabelIndex, 4);
2571  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2572  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2573  Callee = DAG.getLoad(
2574  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2576  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2577  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2578  } else {
2579  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2580  }
2581  }
2582 
2583  if (isCmseNSCall) {
2584  assert(!isARMFunc && !isDirect &&
2585  "Cannot handle call to ARM function or direct call");
2586  if (NumBytes > 0) {
2588  "call to non-secure function would "
2589  "require passing arguments on stack",
2590  dl.getDebugLoc());
2591  DAG.getContext()->diagnose(Diag);
2592  }
2593  if (isStructRet) {
2596  "call to non-secure function would return value through pointer",
2597  dl.getDebugLoc());
2598  DAG.getContext()->diagnose(Diag);
2599  }
2600  }
2601 
2602  // FIXME: handle tail calls differently.
2603  unsigned CallOpc;
2604  if (Subtarget->isThumb()) {
2605  if (isCmseNSCall)
2606  CallOpc = ARMISD::tSECALL;
2607  else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2608  CallOpc = ARMISD::CALL_NOLINK;
2609  else
2610  CallOpc = ARMISD::CALL;
2611  } else {
2612  if (!isDirect && !Subtarget->hasV5TOps())
2613  CallOpc = ARMISD::CALL_NOLINK;
2614  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2615  // Emit regular call when code size is the priority
2616  !Subtarget->hasMinSize())
2617  // "mov lr, pc; b _foo" to avoid confusing the RSP
2618  CallOpc = ARMISD::CALL_NOLINK;
2619  else
2620  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2621  }
2622 
2623  std::vector<SDValue> Ops;
2624  Ops.push_back(Chain);
2625  Ops.push_back(Callee);
2626 
2627  // Add argument registers to the end of the list so that they are known live
2628  // into the call.
2629  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2630  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2631  RegsToPass[i].second.getValueType()));
2632 
2633  // Add a register mask operand representing the call-preserved registers.
2634  if (!isTailCall) {
2635  const uint32_t *Mask;
2636  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2637  if (isThisReturn) {
2638  // For 'this' returns, use the R0-preserving mask if applicable
2639  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2640  if (!Mask) {
2641  // Set isThisReturn to false if the calling convention is not one that
2642  // allows 'returned' to be modeled in this way, so LowerCallResult does
2643  // not try to pass 'this' straight through
2644  isThisReturn = false;
2645  Mask = ARI->getCallPreservedMask(MF, CallConv);
2646  }
2647  } else
2648  Mask = ARI->getCallPreservedMask(MF, CallConv);
2649 
2650  assert(Mask && "Missing call preserved mask for calling convention");
2651  Ops.push_back(DAG.getRegisterMask(Mask));
2652  }
2653 
2654  if (InFlag.getNode())
2655  Ops.push_back(InFlag);
2656 
2657  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2658  if (isTailCall) {
2660  SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2661  DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2662  return Ret;
2663  }
2664 
2665  // Returns a chain and a flag for retval copy to use.
2666  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2667  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2668  InFlag = Chain.getValue(1);
2669  DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2670 
2671  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2672  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2673  if (!Ins.empty())
2674  InFlag = Chain.getValue(1);
2675 
2676  // Handle result values, copying them out of physregs into vregs that we
2677  // return.
2678  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2679  InVals, isThisReturn,
2680  isThisReturn ? OutVals[0] : SDValue());
2681 }
2682 
2683 /// HandleByVal - Every parameter *after* a byval parameter is passed
2684 /// on the stack. Remember the next parameter register to allocate,
2685 /// and then confiscate the rest of the parameter registers to insure
2686 /// this.
2687 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2688  Align Alignment) const {
2689  // Byval (as with any stack) slots are always at least 4 byte aligned.
2690  Alignment = std::max(Alignment, Align(4));
2691 
2692  unsigned Reg = State->AllocateReg(GPRArgRegs);
2693  if (!Reg)
2694  return;
2695 
2696  unsigned AlignInRegs = Alignment.value() / 4;
2697  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2698  for (unsigned i = 0; i < Waste; ++i)
2699  Reg = State->AllocateReg(GPRArgRegs);
2700 
2701  if (!Reg)
2702  return;
2703 
2704  unsigned Excess = 4 * (ARM::R4 - Reg);
2705 
2706  // Special case when NSAA != SP and parameter size greater than size of
2707  // all remained GPR regs. In that case we can't split parameter, we must
2708  // send it to stack. We also must set NCRN to R4, so waste all
2709  // remained registers.
2710  const unsigned NSAAOffset = State->getNextStackOffset();
2711  if (NSAAOffset != 0 && Size > Excess) {
2712  while (State->AllocateReg(GPRArgRegs))
2713  ;
2714  return;
2715  }
2716 
2717  // First register for byval parameter is the first register that wasn't
2718  // allocated before this method call, so it would be "reg".
2719  // If parameter is small enough to be saved in range [reg, r4), then
2720  // the end (first after last) register would be reg + param-size-in-regs,
2721  // else parameter would be splitted between registers and stack,
2722  // end register would be r4 in this case.
2723  unsigned ByValRegBegin = Reg;
2724  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2725  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2726  // Note, first register is allocated in the beginning of function already,
2727  // allocate remained amount of registers we need.
2728  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2729  State->AllocateReg(GPRArgRegs);
2730  // A byval parameter that is split between registers and memory needs its
2731  // size truncated here.
2732  // In the case where the entire structure fits in registers, we set the
2733  // size in memory to zero.
2734  Size = std::max<int>(Size - Excess, 0);
2735 }
2736 
2737 /// MatchingStackOffset - Return true if the given stack call argument is
2738 /// already available in the same position (relatively) of the caller's
2739 /// incoming argument stack.
2740 static
2743  const TargetInstrInfo *TII) {
2744  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2745  int FI = std::numeric_limits<int>::max();
2746  if (Arg.getOpcode() == ISD::CopyFromReg) {
2747  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2748  if (!Register::isVirtualRegister(VR))
2749  return false;
2750  MachineInstr *Def = MRI->getVRegDef(VR);
2751  if (!Def)
2752  return false;
2753  if (!Flags.isByVal()) {
2754  if (!TII->isLoadFromStackSlot(*Def, FI))
2755  return false;
2756  } else {
2757  return false;
2758  }
2759  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2760  if (Flags.isByVal())
2761  // ByVal argument is passed in as a pointer but it's now being
2762  // dereferenced. e.g.
2763  // define @foo(%struct.X* %A) {
2764  // tail call @bar(%struct.X* byval %A)
2765  // }
2766  return false;
2767  SDValue Ptr = Ld->getBasePtr();
2768  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2769  if (!FINode)
2770  return false;
2771  FI = FINode->getIndex();
2772  } else
2773  return false;
2774 
2776  if (!MFI.isFixedObjectIndex(FI))
2777  return false;
2778  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2779 }
2780 
2781 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2782 /// for tail call optimization. Targets which want to do tail call
2783 /// optimization should implement this function.
2784 bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2785  SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2786  bool isCalleeStructRet, bool isCallerStructRet,
2787  const SmallVectorImpl<ISD::OutputArg> &Outs,
2788  const SmallVectorImpl<SDValue> &OutVals,
2790  const bool isIndirect) const {
2791  MachineFunction &MF = DAG.getMachineFunction();
2792  const Function &CallerF = MF.getFunction();
2793  CallingConv::ID CallerCC = CallerF.getCallingConv();
2794 
2795  assert(Subtarget->supportsTailCall());
2796 
2797  // Indirect tail calls cannot be optimized for Thumb1 if the args
2798  // to the call take up r0-r3. The reason is that there are no legal registers
2799  // left to hold the pointer to the function to be called.
2800  if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2801  (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
2802  return false;
2803 
2804  // Look for obvious safe cases to perform tail call optimization that do not
2805  // require ABI changes. This is what gcc calls sibcall.
2806 
2807  // Exception-handling functions need a special set of instructions to indicate
2808  // a return to the hardware. Tail-calling another function would probably
2809  // break this.
2810  if (CallerF.hasFnAttribute("interrupt"))
2811  return false;
2812 
2813  // Also avoid sibcall optimization if either caller or callee uses struct
2814  // return semantics.
2815  if (isCalleeStructRet || isCallerStructRet)
2816  return false;
2817 
2818  // Externally-defined functions with weak linkage should not be
2819  // tail-called on ARM when the OS does not support dynamic
2820  // pre-emption of symbols, as the AAELF spec requires normal calls
2821  // to undefined weak functions to be replaced with a NOP or jump to the
2822  // next instruction. The behaviour of branch instructions in this
2823  // situation (as used for tail calls) is implementation-defined, so we
2824  // cannot rely on the linker replacing the tail call with a return.
2825  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2826  const GlobalValue *GV = G->getGlobal();
2828  if (GV->hasExternalWeakLinkage() &&
2829  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2830  return false;
2831  }
2832 
2833  // Check that the call results are passed in the same way.
2834  LLVMContext &C = *DAG.getContext();
2836  getEffectiveCallingConv(CalleeCC, isVarArg),
2837  getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
2838  CCAssignFnForReturn(CalleeCC, isVarArg),
2839  CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
2840  return false;
2841  // The callee has to preserve all registers the caller needs to preserve.
2842  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2843  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2844  if (CalleeCC != CallerCC) {
2845  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2846  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2847  return false;
2848  }
2849 
2850  // If Caller's vararg or byval argument has been split between registers and
2851  // stack, do not perform tail call, since part of the argument is in caller's
2852  // local frame.
2853  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2854  if (AFI_Caller->getArgRegsSaveSize())
2855  return false;
2856 
2857  // If the callee takes no arguments then go on to check the results of the
2858  // call.
2859  if (!Outs.empty()) {
2860  // Check if stack adjustment is needed. For now, do not do this if any
2861  // argument is passed on the stack.
2863  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2864  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2865  if (CCInfo.getNextStackOffset()) {
2866  // Check if the arguments are already laid out in the right way as
2867  // the caller's fixed stack objects.
2868  MachineFrameInfo &MFI = MF.getFrameInfo();
2869  const MachineRegisterInfo *MRI = &MF.getRegInfo();
2870  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2871  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2872  i != e;
2873  ++i, ++realArgIdx) {
2874  CCValAssign &VA = ArgLocs[i];
2875  EVT RegVT = VA.getLocVT();
2876  SDValue Arg = OutVals[realArgIdx];
2877  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2878  if (VA.getLocInfo() == CCValAssign::Indirect)
2879  return false;
2880  if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
2881  // f64 and vector types are split into multiple registers or
2882  // register/stack-slot combinations. The types will not match
2883  // the registers; give up on memory f64 refs until we figure
2884  // out what to do about this.
2885  if (!VA.isRegLoc())
2886  return false;
2887  if (!ArgLocs[++i].isRegLoc())
2888  return false;
2889  if (RegVT == MVT::v2f64) {
2890  if (!ArgLocs[++i].isRegLoc())
2891  return false;
2892  if (!ArgLocs[++i].isRegLoc())
2893  return false;
2894  }
2895  } else if (!VA.isRegLoc()) {
2896  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2897  MFI, MRI, TII))
2898  return false;
2899  }
2900  }
2901  }
2902 
2903  const MachineRegisterInfo &MRI = MF.getRegInfo();
2904  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2905  return false;
2906  }
2907 
2908  return true;
2909 }
2910 
2911 bool
2912 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2913  MachineFunction &MF, bool isVarArg,
2914  const SmallVectorImpl<ISD::OutputArg> &Outs,
2915  LLVMContext &Context) const {
2917  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2918  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2919 }
2920 
2922  const SDLoc &DL, SelectionDAG &DAG) {
2923  const MachineFunction &MF = DAG.getMachineFunction();
2924  const Function &F = MF.getFunction();
2925 
2926  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2927 
2928  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2929  // version of the "preferred return address". These offsets affect the return
2930  // instruction if this is a return from PL1 without hypervisor extensions.
2931  // IRQ/FIQ: +4 "subs pc, lr, #4"
2932  // SWI: 0 "subs pc, lr, #0"
2933  // ABORT: +4 "subs pc, lr, #4"
2934  // UNDEF: +4/+2 "subs pc, lr, #0"
2935  // UNDEF varies depending on where the exception came from ARM or Thumb
2936  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2937 
2938  int64_t LROffset;
2939  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2940  IntKind == "ABORT")
2941  LROffset = 4;
2942  else if (IntKind == "SWI" || IntKind == "UNDEF")
2943  LROffset = 0;
2944  else
2945  report_fatal_error("Unsupported interrupt attribute. If present, value "
2946  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2947 
2948  RetOps.insert(RetOps.begin() + 1,
2949  DAG.getConstant(LROffset, DL, MVT::i32, false));
2950 
2951  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2952 }
2953 
2954 SDValue
2955 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2956  bool isVarArg,
2957  const SmallVectorImpl<ISD::OutputArg> &Outs,
2958  const SmallVectorImpl<SDValue> &OutVals,
2959  const SDLoc &dl, SelectionDAG &DAG) const {
2960  // CCValAssign - represent the assignment of the return value to a location.
2962 
2963  // CCState - Info about the registers and stack slots.
2964  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2965  *DAG.getContext());
2966 
2967  // Analyze outgoing return values.
2968  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2969 
2970  SDValue Flag;
2971  SmallVector<SDValue, 4> RetOps;
2972  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2973  bool isLittleEndian = Subtarget->isLittle();
2974 
2975  MachineFunction &MF = DAG.getMachineFunction();
2977  AFI->setReturnRegsCount(RVLocs.size());
2978 
2979  // Report error if cmse entry function returns structure through first ptr arg.
2980  if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
2981  // Note: using an empty SDLoc(), as the first line of the function is a
2982  // better place to report than the last line.
2985  "secure entry function would return value through pointer",
2986  SDLoc().getDebugLoc());
2987  DAG.getContext()->diagnose(Diag);
2988  }
2989 
2990  // Copy the result values into the output registers.
2991  for (unsigned i = 0, realRVLocIdx = 0;
2992  i != RVLocs.size();
2993  ++i, ++realRVLocIdx) {
2994  CCValAssign &VA = RVLocs[i];
2995  assert(VA.isRegLoc() && "Can only return in registers!");
2996 
2997  SDValue Arg = OutVals[realRVLocIdx];
2998  bool ReturnF16 = false;
2999 
3000  if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3001  // Half-precision return values can be returned like this:
3002  //
3003  // t11 f16 = fadd ...
3004  // t12: i16 = bitcast t11
3005  // t13: i32 = zero_extend t12
3006  // t14: f32 = bitcast t13 <~~~~~~~ Arg
3007  //
3008  // to avoid code generation for bitcasts, we simply set Arg to the node
3009  // that produces the f16 value, t11 in this case.
3010  //
3011  if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3012  SDValue ZE = Arg.getOperand(0);
3013  if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3014  SDValue BC = ZE.getOperand(0);
3015  if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3016  Arg = BC.getOperand(0);
3017  ReturnF16 = true;
3018  }
3019  }
3020  }
3021  }
3022 
3023  switch (VA.getLocInfo()) {
3024  default: llvm_unreachable("Unknown loc info!");
3025  case CCValAssign::Full: break;
3026  case CCValAssign::BCvt:
3027  if (!ReturnF16)
3028  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3029  break;
3030  }
3031 
3032  // Mask f16 arguments if this is a CMSE nonsecure entry.
3033  auto RetVT = Outs[realRVLocIdx].ArgVT;
3034  if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3035  if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3036  Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3037  } else {
3038  auto LocBits = VA.getLocVT().getSizeInBits();
3039  auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3040  SDValue Mask =
3041  DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3042  Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3043  Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3044  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3045  }
3046  }
3047 
3048  if (VA.needsCustom() &&
3049  (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3050  if (VA.getLocVT() == MVT::v2f64) {
3051  // Extract the first half and return it in two registers.
3053  DAG.getConstant(0, dl, MVT::i32));
3054  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3055  DAG.getVTList(MVT::i32, MVT::i32), Half);
3056 
3057  Chain =
3058  DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3059  HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag);
3060  Flag = Chain.getValue(1);
3061  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3062  VA = RVLocs[++i]; // skip ahead to next loc
3063  Chain =
3064  DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3065  HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag);
3066  Flag = Chain.getValue(1);
3067  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3068  VA = RVLocs[++i]; // skip ahead to next loc
3069 
3070  // Extract the 2nd half and fall through to handle it as an f64 value.
3072  DAG.getConstant(1, dl, MVT::i32));
3073  }
3074  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3075  // available.
3076  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3077  DAG.getVTList(MVT::i32, MVT::i32), Arg);
3078  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3079  fmrrd.getValue(isLittleEndian ? 0 : 1), Flag);
3080  Flag = Chain.getValue(1);
3081  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3082  VA = RVLocs[++i]; // skip ahead to next loc
3083  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3084  fmrrd.getValue(isLittleEndian ? 1 : 0), Flag);
3085  } else
3086  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
3087 
3088  // Guarantee that all emitted copies are
3089  // stuck together, avoiding something bad.
3090  Flag = Chain.getValue(1);
3091  RetOps.push_back(DAG.getRegister(
3092  VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3093  }
3094  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3095  const MCPhysReg *I =
3096  TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3097  if (I) {
3098  for (; *I; ++I) {
3099  if (ARM::GPRRegClass.contains(*I))
3100  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3101  else if (ARM::DPRRegClass.contains(*I))
3102  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3103  else
3104  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3105  }
3106  }
3107 
3108  // Update chain and glue.
3109  RetOps[0] = Chain;
3110  if (Flag.getNode())
3111  RetOps.push_back(Flag);
3112 
3113  // CPUs which aren't M-class use a special sequence to return from
3114  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3115  // though we use "subs pc, lr, #N").
3116  //
3117  // M-class CPUs actually use a normal return sequence with a special
3118  // (hardware-provided) value in LR, so the normal code path works.
3119  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3120  !Subtarget->isMClass()) {
3121  if (Subtarget->isThumb1Only())
3122  report_fatal_error("interrupt attribute is not supported in Thumb1");
3123  return LowerInterruptReturn(RetOps, dl, DAG);
3124  }
3125 
3128  return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3129 }
3130 
3131 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3132  if (N->getNumValues() != 1)
3133  return false;
3134  if (!N->hasNUsesOfValue(1, 0))
3135  return false;
3136 
3137  SDValue TCChain = Chain;
3138  SDNode *Copy = *N->use_begin();
3139  if (Copy->getOpcode() == ISD::CopyToReg) {
3140  // If the copy has a glue operand, we conservatively assume it isn't safe to
3141  // perform a tail call.
3142  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3143  return false;
3144  TCChain = Copy->getOperand(0);
3145  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3146  SDNode *VMov = Copy;
3147  // f64 returned in a pair of GPRs.
3149  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
3150  UI != UE; ++UI) {
3151  if (UI->getOpcode() != ISD::CopyToReg)
3152  return false;
3153  Copies.insert(*UI);
3154  }
3155  if (Copies.size() > 2)
3156  return false;
3157 
3158  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
3159  UI != UE; ++UI) {
3160  SDValue UseChain = UI->getOperand(0);
3161  if (Copies.count(UseChain.getNode()))
3162  // Second CopyToReg
3163  Copy = *UI;
3164  else {
3165  // We are at the top of this chain.
3166  // If the copy has a glue operand, we conservatively assume it
3167  // isn't safe to perform a tail call.
3168  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
3169  return false;
3170  // First CopyToReg
3171  TCChain = UseChain;
3172  }
3173  }
3174  } else if (Copy->getOpcode() == ISD::BITCAST) {
3175  // f32 returned in a single GPR.
3176  if (!Copy->hasOneUse())
3177  return false;
3178  Copy = *Copy->use_begin();
3179  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3180  return false;
3181  // If the copy has a glue operand, we conservatively assume it isn't safe to
3182  // perform a tail call.
3183  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3184  return false;
3185  TCChain = Copy->getOperand(0);
3186  } else {
3187  return false;
3188  }
3189 
3190  bool HasRet = false;
3191  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
3192  UI != UE; ++UI) {
3193  if (UI->getOpcode() != ARMISD::RET_FLAG &&
3194  UI->getOpcode() != ARMISD::INTRET_FLAG)
3195  return false;
3196  HasRet = true;
3197  }
3198 
3199  if (!HasRet)
3200  return false;
3201 
3202  Chain = TCChain;
3203  return true;
3204 }
3205 
3206 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3207  if (!Subtarget->supportsTailCall())
3208  return false;
3209 
3210  if (!CI->isTailCall())
3211  return false;
3212 
3213  return true;
3214 }
3215 
3216 // Trying to write a 64 bit value so need to split into two 32 bit values first,
3217 // and pass the lower and high parts through.
3219  SDLoc DL(Op);
3220  SDValue WriteValue = Op->getOperand(2);
3221 
3222  // This function is only supposed to be called for i64 type argument.
3223  assert(WriteValue.getValueType() == MVT::i64
3224  && "LowerWRITE_REGISTER called for non-i64 type argument.");
3225 
3226  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3227  DAG.getConstant(0, DL, MVT::i32));
3228  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3229  DAG.getConstant(1, DL, MVT::i32));
3230  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3231  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3232 }
3233 
3234 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3235 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3236 // one of the above mentioned nodes. It has to be wrapped because otherwise
3237 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3238 // be used to form addressing mode. These wrapped nodes will be selected
3239 // into MOVi.
3240 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3241  SelectionDAG &DAG) const {
3242  EVT PtrVT = Op.getValueType();
3243  // FIXME there is no actual debug info here
3244  SDLoc dl(Op);
3245  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3246  SDValue Res;
3247 
3248  // When generating execute-only code Constant Pools must be promoted to the
3249  // global data section. It's a bit ugly that we can't share them across basic
3250  // blocks, but this way we guarantee that execute-only behaves correct with
3251  // position-independent addressing modes.
3252  if (Subtarget->genExecuteOnly()) {
3253  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3254  auto T = const_cast<Type*>(CP->getType());
3255  auto C = const_cast<Constant*>(CP->getConstVal());
3256  auto M = const_cast<Module*>(DAG.getMachineFunction().
3257  getFunction().getParent());
3258  auto GV = new GlobalVariable(
3259  *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3260  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
3261  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
3262  Twine(AFI->createPICLabelUId())
3263  );
3264  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3265  dl, PtrVT);
3266  return LowerGlobalAddress(GA, DAG);
3267  }
3268 
3269  if (CP->isMachineConstantPoolEntry())
3270  Res =
3271  DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3272  else
3273  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign());
3274  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3275 }
3276 
3279 }
3280 
3281 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3282  SelectionDAG &DAG) const {
3283  MachineFunction &MF = DAG.getMachineFunction();
3285  unsigned ARMPCLabelIndex = 0;
3286  SDLoc DL(Op);
3287  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3288  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3289  SDValue CPAddr;
3290  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3291  if (!IsPositionIndependent) {
3292  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3293  } else {
3294  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3295  ARMPCLabelIndex = AFI->createPICLabelUId();
3296  ARMConstantPoolValue *CPV =
3297  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3298  ARMCP::CPBlockAddress, PCAdj);
3299  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3300  }
3301  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3302  SDValue Result = DAG.getLoad(
3303  PtrVT, DL, DAG.getEntryNode(), CPAddr,
3305  if (!IsPositionIndependent)
3306  return Result;
3307  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3308  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3309 }
3310 
3311 /// Convert a TLS address reference into the correct sequence of loads
3312 /// and calls to compute the variable's address for Darwin, and return an
3313 /// SDValue containing the final node.
3314 
3315 /// Darwin only has one TLS scheme which must be capable of dealing with the
3316 /// fully general situation, in the worst case. This means:
3317 /// + "extern __thread" declaration.
3318 /// + Defined in a possibly unknown dynamic library.
3319 ///
3320 /// The general system is that each __thread variable has a [3 x i32] descriptor
3321 /// which contains information used by the runtime to calculate the address. The
3322 /// only part of this the compiler needs to know about is the first word, which
3323 /// contains a function pointer that must be called with the address of the
3324 /// entire descriptor in "r0".
3325 ///
3326 /// Since this descriptor may be in a different unit, in general access must
3327 /// proceed along the usual ARM rules. A common sequence to produce is:
3328 ///
3329 /// movw rT1, :lower16:_var$non_lazy_ptr
3330 /// movt rT1, :upper16:_var$non_lazy_ptr
3331 /// ldr r0, [rT1]
3332 /// ldr rT2, [r0]
3333 /// blx rT2
3334 /// [...address now in r0...]
3335 SDValue
3336 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3337  SelectionDAG &DAG) const {
3338  assert(Subtarget->isTargetDarwin() &&
3339  "This function expects a Darwin target");
3340  SDLoc DL(Op);
3341 
3342  // First step is to get the address of the actua global symbol. This is where
3343  // the TLS descriptor lives.
3344  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3345 
3346  // The first entry in the descriptor is a function pointer that we must call
3347  // to obtain the address of the variable.
3348  SDValue Chain = DAG.getEntryNode();
3349  SDValue FuncTLVGet = DAG.getLoad(
3350  MVT::i32, DL, Chain, DescAddr,
3354  Chain = FuncTLVGet.getValue(1);
3355 
3357  MachineFrameInfo &MFI = F.getFrameInfo();
3358  MFI.setAdjustsStack(true);
3359 
3360  // TLS calls preserve all registers except those that absolutely must be
3361  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3362  // silly).
3363  auto TRI =
3364  getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3365  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3367 
3368  // Finally, we can make the call. This is just a degenerate version of a
3369  // normal AArch64 call node: r0 takes the address of the descriptor, and
3370  // returns the address of the variable in this thread.
3371  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3372  Chain =
3374  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3375  DAG.getRegisterMask(Mask), Chain.getValue(1));
3376  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3377 }
3378 
3379 SDValue
3380 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3381  SelectionDAG &DAG) const {
3382  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3383 
3384  SDValue Chain = DAG.getEntryNode();
3385  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3386  SDLoc DL(Op);
3387 
3388  // Load the current TEB (thread environment block)
3389  SDValue Ops[] = {Chain,
3390  DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3391  DAG.getTargetConstant(15, DL, MVT::i32),
3392  DAG.getTargetConstant(0, DL, MVT::i32),
3393  DAG.getTargetConstant(13, DL, MVT::i32),
3394  DAG.getTargetConstant(0, DL, MVT::i32),
3395  DAG.getTargetConstant(2, DL, MVT::i32)};
3396  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3397  DAG.getVTList(MVT::i32, MVT::Other), Ops);
3398 
3399  SDValue TEB = CurrentTEB.getValue(0);
3400  Chain = CurrentTEB.getValue(1);
3401 
3402  // Load the ThreadLocalStoragePointer from the TEB
3403  // A pointer to the TLS array is located at offset 0x2c from the TEB.
3404  SDValue TLSArray =
3405  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3406  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3407 
3408  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3409  // offset into the TLSArray.
3410 
3411  // Load the TLS index from the C runtime
3412  SDValue TLSIndex =
3413  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3414  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3415  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3416 
3417  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3418  DAG.getConstant(2, DL, MVT::i32));
3419  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3420  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3421  MachinePointerInfo());
3422 
3423  // Get the offset of the start of the .tls section (section base)
3424  const auto *GA = cast<GlobalAddressSDNode>(Op);
3425  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3426  SDValue Offset = DAG.getLoad(
3427  PtrVT, DL, Chain,
3429  DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3431 
3432  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3433 }
3434 
3435 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
3436 SDValue
3437 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3438  SelectionDAG &DAG) const {
3439  SDLoc dl(GA);
3440  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3441  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3442  MachineFunction &MF = DAG.getMachineFunction();
3444  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3445  ARMConstantPoolValue *CPV =
3446  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3447  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3448  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3450  Argument = DAG.getLoad(
3451  PtrVT, dl, DAG.getEntryNode(), Argument,
3453  SDValue Chain = Argument.getValue(1);
3454 
3455  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3456  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3457 
3458  // call __tls_get_addr.
3459  ArgListTy Args;
3460  ArgListEntry Entry;
3461  Entry.Node = Argument;
3462  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3463  Args.push_back(Entry);
3464 
3465  // FIXME: is there useful debug info available here?
3467  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3469  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3470 
3471  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3472  return CallResult.first;
3473 }
3474 
3475 // Lower ISD::GlobalTLSAddress using the "initial exec" or
3476 // "local exec" model.
3477 SDValue
3478 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3479  SelectionDAG &DAG,
3480  TLSModel::Model model) const {
3481  const GlobalValue *GV = GA->getGlobal();
3482  SDLoc dl(GA);
3483  SDValue Offset;
3484  SDValue Chain = DAG.getEntryNode();
3485  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3486  // Get the Thread Pointer
3488 
3489  if (model == TLSModel::InitialExec) {
3490  MachineFunction &MF = DAG.getMachineFunction();
3492  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3493  // Initial exec model.
3494  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3495  ARMConstantPoolValue *CPV =
3496  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3498  true);
3499  Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3501  Offset = DAG.getLoad(
3502  PtrVT, dl, Chain, Offset,
3504  Chain = Offset.getValue(1);
3505 
3506  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3507  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3508 
3509  Offset = DAG.getLoad(
3510  PtrVT, dl, Chain, Offset,
3512  } else {
3513  // local exec model
3515  ARMConstantPoolValue *CPV =
3517  Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3519  Offset = DAG.getLoad(
3520  PtrVT, dl, Chain, Offset,
3522  }
3523 
3524  // The address of the thread local variable is the add of the thread
3525  // pointer with the offset of the variable.
3526  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3527 }
3528 
3529 SDValue
3530 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3531  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3532  if (DAG.getTarget().useEmulatedTLS())
3533  return LowerToTLSEmulatedModel(GA, DAG);
3534 
3535  if (Subtarget->isTargetDarwin())
3536  return LowerGlobalTLSAddressDarwin(Op, DAG);
3537 
3538  if (Subtarget->isTargetWindows())
3539  return LowerGlobalTLSAddressWindows(Op, DAG);
3540 
3541  // TODO: implement the "local dynamic" model
3542  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3544 
3545  switch (model) {
3548  return LowerToTLSGeneralDynamicModel(GA, DAG);
3549  case TLSModel::InitialExec:
3550  case TLSModel::LocalExec:
3551  return LowerToTLSExecModels(GA, DAG, model);
3552  }
3553  llvm_unreachable("bogus TLS model");
3554 }
3555 
3556 /// Return true if all users of V are within function F, looking through
3557 /// ConstantExprs.
3558 static bool allUsersAreInFunction(const Value *V, const Function *F) {
3559  SmallVector<const User*,4> Worklist(V->users());
3560  while (!Worklist.empty()) {
3561  auto *U = Worklist.pop_back_val();
3562  if (isa<ConstantExpr>(U)) {
3563  append_range(Worklist, U->users());
3564  continue;
3565  }
3566 
3567  auto *I = dyn_cast<Instruction>(U);
3568  if (!I || I->getParent()->getParent() != F)
3569  return false;
3570  }
3571  return true;
3572 }
3573 
3575  const GlobalValue *GV, SelectionDAG &DAG,
3576  EVT PtrVT, const SDLoc &dl) {
3577  // If we're creating a pool entry for a constant global with unnamed address,
3578  // and the global is small enough, we can emit it inline into the constant pool
3579  // to save ourselves an indirection.
3580  //
3581  // This is a win if the constant is only used in one function (so it doesn't
3582  // need to be duplicated) or duplicating the constant wouldn't increase code
3583  // size (implying the constant is no larger than 4 bytes).
3584  const Function &F = DAG.getMachineFunction().getFunction();
3585 
3586  // We rely on this decision to inline being idemopotent and unrelated to the
3587  // use-site. We know that if we inline a variable at one use site, we'll
3588  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3589  // doesn't know about this optimization, so bail out if it's enabled else
3590  // we could decide to inline here (and thus never emit the GV) but require
3591  // the GV from fast-isel generated code.
3592  if (!EnableConstpoolPromotion ||
3594  return SDValue();
3595 
3596  auto *GVar = dyn_cast<GlobalVariable>(GV);
3597  if (!GVar || !GVar->hasInitializer() ||
3598  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3599  !GVar->hasLocalLinkage())
3600  return SDValue();
3601 
3602  // If we inline a value that contains relocations, we move the relocations
3603  // from .data to .text. This is not allowed in position-independent code.
3604  auto *Init = GVar->getInitializer();
3605  if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3606  Init->needsDynamicRelocation())
3607  return SDValue();
3608 
3609  // The constant islands pass can only really deal with alignment requests
3610  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3611  // any type wanting greater alignment requirements than 4 bytes. We also
3612  // can only promote constants that are multiples of 4 bytes in size or
3613  // are paddable to a multiple of 4. Currently we only try and pad constants
3614  // that are strings for simplicity.
3615  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3616  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3617  Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3618  unsigned RequiredPadding = 4 - (Size % 4);
3619  bool PaddingPossible =
3620  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3621  if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3622  Size == 0)
3623  return SDValue();
3624 
3625  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3626  MachineFunction &MF = DAG.getMachineFunction();
3628 
3629  // We can't bloat the constant pool too much, else the ConstantIslands pass
3630  // may fail to converge. If we haven't promoted this global yet (it may have
3631  // multiple uses), and promoting it would increase the constant pool size (Sz
3632  // > 4), ensure we have space to do so up to MaxTotal.
3633  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3634  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3636  return SDValue();
3637 
3638  // This is only valid if all users are in a single function; we can't clone
3639  // the constant in general. The LLVM IR unnamed_addr allows merging
3640  // constants, but not cloning them.
3641  //
3642  // We could potentially allow cloning if we could prove all uses of the
3643  // constant in the current function don't care about the address, like
3644  // printf format strings. But that isn't implemented for now.
3645  if (!allUsersAreInFunction(GVar, &F))
3646  return SDValue();
3647 
3648  // We're going to inline this global. Pad it out if needed.
3649  if (RequiredPadding != 4) {
3650  StringRef S = CDAInit->getAsString();
3651 
3652  SmallVector<uint8_t,16> V(S.size());
3653  std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3654  while (RequiredPadding--)
3655  V.push_back(0);
3656  Init = ConstantDataArray::get(*DAG.getContext(), V);
3657  }
3658 
3659  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3660  SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3661  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3664  PaddedSize - 4);
3665  }
3666  ++NumConstpoolPromoted;
3667  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3668 }
3669 
3671  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3672  if (!(GV = GA->getBaseObject()))
3673  return false;
3674  if (const auto *V = dyn_cast<GlobalVariable>(GV))
3675  return V->isConstant();
3676  return isa<Function>(GV);
3677 }
3678 
3679 SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3680  SelectionDAG &DAG) const {
3681  switch (Subtarget->getTargetTriple().getObjectFormat()) {
3682  default: llvm_unreachable("unknown object format");
3683  case Triple::COFF:
3684  return LowerGlobalAddressWindows(Op, DAG);
3685  case Triple::ELF:
3686  return LowerGlobalAddressELF(Op, DAG);
3687  case Triple::MachO:
3688  return LowerGlobalAddressDarwin(Op, DAG);
3689  }
3690 }
3691 
3692 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3693  SelectionDAG &DAG) const {
3694  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3695  SDLoc dl(Op);
3696  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3697  const TargetMachine &TM = getTargetMachine();
3698  bool IsRO = isReadOnly(GV);
3699 
3700  // promoteToConstantPool only if not generating XO text section
3701  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3702  if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3703  return V;
3704 
3705  if (isPositionIndependent()) {
3706  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3707  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3708  UseGOT_PREL ? ARMII::MO_GOT : 0);
3709  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3710  if (UseGOT_PREL)
3711  Result =
3712  DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3714  return Result;
3715  } else if (Subtarget->isROPI() && IsRO) {
3716  // PC-relative.
3717  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3718  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3719  return Result;
3720  } else if (Subtarget->isRWPI() && !IsRO) {
3721  // SB-relative.
3722  SDValue RelAddr;
3723  if (Subtarget->useMovt()) {
3724  ++NumMovwMovt;
3725  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3726  RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3727  } else { // use literal pool for address constant
3728  ARMConstantPoolValue *CPV =
3730  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3731  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3732  RelAddr = DAG.getLoad(
3733  PtrVT, dl, DAG.getEntryNode(), CPAddr,