LLVM  15.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that ARM uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMISelLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMCallingConv.h"
18 #include "ARMConstantPoolValue.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMPerfectShuffle.h"
21 #include "ARMRegisterInfo.h"
22 #include "ARMSelectionDAGInfo.h"
23 #include "ARMSubtarget.h"
24 #include "ARMTargetTransformInfo.h"
27 #include "Utils/ARMBaseInfo.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/BitVector.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/ADT/StringExtras.h"
38 #include "llvm/ADT/StringRef.h"
39 #include "llvm/ADT/StringSwitch.h"
40 #include "llvm/ADT/Triple.h"
41 #include "llvm/ADT/Twine.h"
66 #include "llvm/IR/Attributes.h"
67 #include "llvm/IR/CallingConv.h"
68 #include "llvm/IR/Constant.h"
69 #include "llvm/IR/Constants.h"
70 #include "llvm/IR/DataLayout.h"
71 #include "llvm/IR/DebugLoc.h"
72 #include "llvm/IR/DerivedTypes.h"
73 #include "llvm/IR/Function.h"
74 #include "llvm/IR/GlobalAlias.h"
75 #include "llvm/IR/GlobalValue.h"
76 #include "llvm/IR/GlobalVariable.h"
77 #include "llvm/IR/IRBuilder.h"
78 #include "llvm/IR/InlineAsm.h"
79 #include "llvm/IR/Instruction.h"
80 #include "llvm/IR/Instructions.h"
81 #include "llvm/IR/IntrinsicInst.h"
82 #include "llvm/IR/Intrinsics.h"
83 #include "llvm/IR/IntrinsicsARM.h"
84 #include "llvm/IR/Module.h"
85 #include "llvm/IR/PatternMatch.h"
86 #include "llvm/IR/Type.h"
87 #include "llvm/IR/User.h"
88 #include "llvm/IR/Value.h"
89 #include "llvm/MC/MCInstrDesc.h"
91 #include "llvm/MC/MCRegisterInfo.h"
92 #include "llvm/MC/MCSchedule.h"
95 #include "llvm/Support/Casting.h"
96 #include "llvm/Support/CodeGen.h"
98 #include "llvm/Support/Compiler.h"
99 #include "llvm/Support/Debug.h"
101 #include "llvm/Support/KnownBits.h"
103 #include "llvm/Support/MathExtras.h"
107 #include <algorithm>
108 #include <cassert>
109 #include <cstdint>
110 #include <cstdlib>
111 #include <iterator>
112 #include <limits>
113 #include <string>
114 #include <tuple>
115 #include <utility>
116 #include <vector>
117 
118 using namespace llvm;
119 using namespace llvm::PatternMatch;
120 
121 #define DEBUG_TYPE "arm-isel"
122 
123 STATISTIC(NumTailCalls, "Number of tail calls");
124 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
125 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
126 STATISTIC(NumConstpoolPromoted,
127  "Number of constants with their storage promoted into constant pools");
128 
129 static cl::opt<bool>
130 ARMInterworking("arm-interworking", cl::Hidden,
131  cl::desc("Enable / disable ARM interworking (for debugging only)"),
132  cl::init(true));
133 
135  "arm-promote-constant", cl::Hidden,
136  cl::desc("Enable / disable promotion of unnamed_addr constants into "
137  "constant pools"),
138  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
140  "arm-promote-constant-max-size", cl::Hidden,
141  cl::desc("Maximum size of constant to promote into a constant pool"),
142  cl::init(64));
144  "arm-promote-constant-max-total", cl::Hidden,
145  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
146  cl::init(128));
147 
149 MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
150  cl::desc("Maximum interleave factor for MVE VLDn to generate."),
151  cl::init(2));
152 
153 // The APCS parameter registers.
154 static const MCPhysReg GPRArgRegs[] = {
155  ARM::R0, ARM::R1, ARM::R2, ARM::R3
156 };
157 
158 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
159  if (VT != PromotedLdStVT) {
160  setOperationAction(ISD::LOAD, VT, Promote);
161  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
162 
163  setOperationAction(ISD::STORE, VT, Promote);
164  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
165  }
166 
167  MVT ElemTy = VT.getVectorElementType();
168  if (ElemTy != MVT::f64)
169  setOperationAction(ISD::SETCC, VT, Custom);
170  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
171  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
172  if (ElemTy == MVT::i32) {
173  setOperationAction(ISD::SINT_TO_FP, VT, Custom);
174  setOperationAction(ISD::UINT_TO_FP, VT, Custom);
175  setOperationAction(ISD::FP_TO_SINT, VT, Custom);
176  setOperationAction(ISD::FP_TO_UINT, VT, Custom);
177  } else {
178  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
179  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
180  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
181  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
182  }
183  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
184  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
185  setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
186  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
187  setOperationAction(ISD::SELECT, VT, Expand);
188  setOperationAction(ISD::SELECT_CC, VT, Expand);
189  setOperationAction(ISD::VSELECT, VT, Expand);
190  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
191  if (VT.isInteger()) {
192  setOperationAction(ISD::SHL, VT, Custom);
193  setOperationAction(ISD::SRA, VT, Custom);
194  setOperationAction(ISD::SRL, VT, Custom);
195  }
196 
197  // Neon does not support vector divide/remainder operations.
198  setOperationAction(ISD::SDIV, VT, Expand);
199  setOperationAction(ISD::UDIV, VT, Expand);
200  setOperationAction(ISD::FDIV, VT, Expand);
201  setOperationAction(ISD::SREM, VT, Expand);
202  setOperationAction(ISD::UREM, VT, Expand);
203  setOperationAction(ISD::FREM, VT, Expand);
204  setOperationAction(ISD::SDIVREM, VT, Expand);
205  setOperationAction(ISD::UDIVREM, VT, Expand);
206 
207  if (!VT.isFloatingPoint() &&
208  VT != MVT::v2i64 && VT != MVT::v1i64)
209  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
210  setOperationAction(Opcode, VT, Legal);
211  if (!VT.isFloatingPoint())
212  for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
213  setOperationAction(Opcode, VT, Legal);
214 }
215 
216 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
217  addRegisterClass(VT, &ARM::DPRRegClass);
218  addTypeForNEON(VT, MVT::f64);
219 }
220 
221 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
222  addRegisterClass(VT, &ARM::DPairRegClass);
223  addTypeForNEON(VT, MVT::v2f64);
224 }
225 
226 void ARMTargetLowering::setAllExpand(MVT VT) {
227  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
228  setOperationAction(Opc, VT, Expand);
229 
230  // We support these really simple operations even on types where all
231  // the actual arithmetic has to be broken down into simpler
232  // operations or turned into library calls.
233  setOperationAction(ISD::BITCAST, VT, Legal);
234  setOperationAction(ISD::LOAD, VT, Legal);
235  setOperationAction(ISD::STORE, VT, Legal);
236  setOperationAction(ISD::UNDEF, VT, Legal);
237 }
238 
239 void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
240  LegalizeAction Action) {
241  setLoadExtAction(ISD::EXTLOAD, From, To, Action);
242  setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
243  setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
244 }
245 
246 void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
247  const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
248 
249  for (auto VT : IntTypes) {
250  addRegisterClass(VT, &ARM::MQPRRegClass);
251  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
252  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
253  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
254  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
255  setOperationAction(ISD::SHL, VT, Custom);
256  setOperationAction(ISD::SRA, VT, Custom);
257  setOperationAction(ISD::SRL, VT, Custom);
258  setOperationAction(ISD::SMIN, VT, Legal);
259  setOperationAction(ISD::SMAX, VT, Legal);
260  setOperationAction(ISD::UMIN, VT, Legal);
261  setOperationAction(ISD::UMAX, VT, Legal);
262  setOperationAction(ISD::ABS, VT, Legal);
263  setOperationAction(ISD::SETCC, VT, Custom);
264  setOperationAction(ISD::MLOAD, VT, Custom);
265  setOperationAction(ISD::MSTORE, VT, Legal);
266  setOperationAction(ISD::CTLZ, VT, Legal);
267  setOperationAction(ISD::CTTZ, VT, Custom);
268  setOperationAction(ISD::BITREVERSE, VT, Legal);
269  setOperationAction(ISD::BSWAP, VT, Legal);
270  setOperationAction(ISD::SADDSAT, VT, Legal);
271  setOperationAction(ISD::UADDSAT, VT, Legal);
272  setOperationAction(ISD::SSUBSAT, VT, Legal);
273  setOperationAction(ISD::USUBSAT, VT, Legal);
274  setOperationAction(ISD::ABDS, VT, Legal);
275  setOperationAction(ISD::ABDU, VT, Legal);
276  setOperationAction(ISD::AVGFLOORS, VT, Legal);
277  setOperationAction(ISD::AVGFLOORU, VT, Legal);
278  setOperationAction(ISD::AVGCEILS, VT, Legal);
279  setOperationAction(ISD::AVGCEILU, VT, Legal);
280 
281  // No native support for these.
282  setOperationAction(ISD::UDIV, VT, Expand);
283  setOperationAction(ISD::SDIV, VT, Expand);
284  setOperationAction(ISD::UREM, VT, Expand);
285  setOperationAction(ISD::SREM, VT, Expand);
286  setOperationAction(ISD::UDIVREM, VT, Expand);
287  setOperationAction(ISD::SDIVREM, VT, Expand);
288  setOperationAction(ISD::CTPOP, VT, Expand);
289  setOperationAction(ISD::SELECT, VT, Expand);
290  setOperationAction(ISD::SELECT_CC, VT, Expand);
291 
292  // Vector reductions
293  setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
294  setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
295  setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
296  setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
297  setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
298  setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
299  setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
300  setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
301  setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
302 
303  if (!HasMVEFP) {
304  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
305  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
306  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
307  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
308  } else {
309  setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
310  setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
311  }
312 
313  // Pre and Post inc are supported on loads and stores
314  for (unsigned im = (unsigned)ISD::PRE_INC;
315  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
316  setIndexedLoadAction(im, VT, Legal);
317  setIndexedStoreAction(im, VT, Legal);
318  setIndexedMaskedLoadAction(im, VT, Legal);
319  setIndexedMaskedStoreAction(im, VT, Legal);
320  }
321  }
322 
323  const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
324  for (auto VT : FloatTypes) {
325  addRegisterClass(VT, &ARM::MQPRRegClass);
326  if (!HasMVEFP)
327  setAllExpand(VT);
328 
329  // These are legal or custom whether we have MVE.fp or not
330  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
331  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
332  setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
333  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
334  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
335  setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
336  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
337  setOperationAction(ISD::SETCC, VT, Custom);
338  setOperationAction(ISD::MLOAD, VT, Custom);
339  setOperationAction(ISD::MSTORE, VT, Legal);
340  setOperationAction(ISD::SELECT, VT, Expand);
341  setOperationAction(ISD::SELECT_CC, VT, Expand);
342 
343  // Pre and Post inc are supported on loads and stores
344  for (unsigned im = (unsigned)ISD::PRE_INC;
345  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
346  setIndexedLoadAction(im, VT, Legal);
347  setIndexedStoreAction(im, VT, Legal);
348  setIndexedMaskedLoadAction(im, VT, Legal);
349  setIndexedMaskedStoreAction(im, VT, Legal);
350  }
351 
352  if (HasMVEFP) {
353  setOperationAction(ISD::FMINNUM, VT, Legal);
354  setOperationAction(ISD::FMAXNUM, VT, Legal);
355  setOperationAction(ISD::FROUND, VT, Legal);
356  setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
357  setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
358  setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
359  setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
360 
361  // No native support for these.
362  setOperationAction(ISD::FDIV, VT, Expand);
363  setOperationAction(ISD::FREM, VT, Expand);
364  setOperationAction(ISD::FSQRT, VT, Expand);
365  setOperationAction(ISD::FSIN, VT, Expand);
366  setOperationAction(ISD::FCOS, VT, Expand);
367  setOperationAction(ISD::FPOW, VT, Expand);
368  setOperationAction(ISD::FLOG, VT, Expand);
369  setOperationAction(ISD::FLOG2, VT, Expand);
370  setOperationAction(ISD::FLOG10, VT, Expand);
371  setOperationAction(ISD::FEXP, VT, Expand);
372  setOperationAction(ISD::FEXP2, VT, Expand);
373  setOperationAction(ISD::FNEARBYINT, VT, Expand);
374  }
375  }
376 
377  // Custom Expand smaller than legal vector reductions to prevent false zero
378  // items being added.
379  setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom);
380  setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom);
381  setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom);
382  setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom);
383  setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
384  setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom);
385  setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom);
386  setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom);
387 
388  // We 'support' these types up to bitcast/load/store level, regardless of
389  // MVE integer-only / float support. Only doing FP data processing on the FP
390  // vector types is inhibited at integer-only level.
391  const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
392  for (auto VT : LongTypes) {
393  addRegisterClass(VT, &ARM::MQPRRegClass);
394  setAllExpand(VT);
395  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
396  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
397  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
398  setOperationAction(ISD::VSELECT, VT, Legal);
399  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
400  }
401  setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
402 
403  // We can do bitwise operations on v2i64 vectors
404  setOperationAction(ISD::AND, MVT::v2i64, Legal);
405  setOperationAction(ISD::OR, MVT::v2i64, Legal);
406  setOperationAction(ISD::XOR, MVT::v2i64, Legal);
407 
408  // It is legal to extload from v4i8 to v4i16 or v4i32.
409  addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
410  addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
411  addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
412 
413  // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
414  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
415  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
416  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
417  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal);
418  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal);
419 
420  // Some truncating stores are legal too.
421  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
422  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
423  setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
424 
425  // Pre and Post inc on these are legal, given the correct extends
426  for (unsigned im = (unsigned)ISD::PRE_INC;
427  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
428  for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
429  setIndexedLoadAction(im, VT, Legal);
430  setIndexedStoreAction(im, VT, Legal);
431  setIndexedMaskedLoadAction(im, VT, Legal);
432  setIndexedMaskedStoreAction(im, VT, Legal);
433  }
434  }
435 
436  // Predicate types
437  const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
438  for (auto VT : pTypes) {
439  addRegisterClass(VT, &ARM::VCCRRegClass);
440  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
441  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
442  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
443  setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
444  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
445  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
446  setOperationAction(ISD::SETCC, VT, Custom);
447  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
448  setOperationAction(ISD::LOAD, VT, Custom);
449  setOperationAction(ISD::STORE, VT, Custom);
450  setOperationAction(ISD::TRUNCATE, VT, Custom);
451  setOperationAction(ISD::VSELECT, VT, Expand);
452  setOperationAction(ISD::SELECT, VT, Expand);
453  }
454  setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
455  setOperationAction(ISD::TRUNCATE, MVT::v2i1, Expand);
456  setOperationAction(ISD::AND, MVT::v2i1, Expand);
457  setOperationAction(ISD::OR, MVT::v2i1, Expand);
458  setOperationAction(ISD::XOR, MVT::v2i1, Expand);
459  setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Expand);
460  setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Expand);
461  setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Expand);
462  setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Expand);
463 
464  setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
465  setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
466  setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
467  setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
468  setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
469  setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
470  setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
471  setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
472 }
473 
475  const ARMSubtarget &STI)
476  : TargetLowering(TM), Subtarget(&STI) {
477  RegInfo = Subtarget->getRegisterInfo();
478  Itins = Subtarget->getInstrItineraryData();
479 
482 
483  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
484  !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
485  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
486  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
487  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
488  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
490  }
491 
492  if (Subtarget->isTargetMachO()) {
493  // Uses VFP for Thumb libfuncs if available.
494  if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
495  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
496  static const struct {
497  const RTLIB::Libcall Op;
498  const char * const Name;
499  const ISD::CondCode Cond;
500  } LibraryCalls[] = {
501  // Single-precision floating-point arithmetic.
502  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
503  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
504  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
505  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
506 
507  // Double-precision floating-point arithmetic.
508  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
509  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
510  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
511  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
512 
513  // Single-precision comparisons.
514  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
515  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
516  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
517  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
518  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
519  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
520  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
521 
522  // Double-precision comparisons.
523  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
524  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
525  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
526  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
527  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
528  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
529  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
530 
531  // Floating-point to integer conversions.
532  // i64 conversions are done via library routines even when generating VFP
533  // instructions, so use the same ones.
534  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
535  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
536  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
537  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
538 
539  // Conversions between floating types.
540  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
541  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
542 
543  // Integer to floating-point conversions.
544  // i64 conversions are done via library routines even when generating VFP
545  // instructions, so use the same ones.
546  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
547  // e.g., __floatunsidf vs. __floatunssidfvfp.
548  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
549  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
550  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
551  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
552  };
553 
554  for (const auto &LC : LibraryCalls) {
555  setLibcallName(LC.Op, LC.Name);
556  if (LC.Cond != ISD::SETCC_INVALID)
557  setCmpLibcallCC(LC.Op, LC.Cond);
558  }
559  }
560  }
561 
562  // These libcalls are not available in 32-bit.
563  setLibcallName(RTLIB::SHL_I128, nullptr);
564  setLibcallName(RTLIB::SRL_I128, nullptr);
565  setLibcallName(RTLIB::SRA_I128, nullptr);
566  setLibcallName(RTLIB::MUL_I128, nullptr);
567  setLibcallName(RTLIB::MULO_I64, nullptr);
568  setLibcallName(RTLIB::MULO_I128, nullptr);
569 
570  // RTLIB
571  if (Subtarget->isAAPCS_ABI() &&
572  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
573  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
574  static const struct {
575  const RTLIB::Libcall Op;
576  const char * const Name;
577  const CallingConv::ID CC;
578  const ISD::CondCode Cond;
579  } LibraryCalls[] = {
580  // Double-precision floating-point arithmetic helper functions
581  // RTABI chapter 4.1.2, Table 2
582  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
583  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
584  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
585  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
586 
587  // Double-precision floating-point comparison helper functions
588  // RTABI chapter 4.1.2, Table 3
589  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
590  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
591  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
592  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
593  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
594  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
595  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
596 
597  // Single-precision floating-point arithmetic helper functions
598  // RTABI chapter 4.1.2, Table 4
599  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
600  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
601  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
602  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
603 
604  // Single-precision floating-point comparison helper functions
605  // RTABI chapter 4.1.2, Table 5
606  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
607  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
608  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
609  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
610  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
611  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
612  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
613 
614  // Floating-point to integer conversions.
615  // RTABI chapter 4.1.2, Table 6
616  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
617  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
618  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
619  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
620  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
621  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
622  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
623  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
624 
625  // Conversions between floating types.
626  // RTABI chapter 4.1.2, Table 7
627  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
628  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
629  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
630 
631  // Integer to floating-point conversions.
632  // RTABI chapter 4.1.2, Table 8
633  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
634  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
635  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
636  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
639  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
640  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
641 
642  // Long long helper functions
643  // RTABI chapter 4.2, Table 9
644  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
645  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
646  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
647  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
648 
649  // Integer division functions
650  // RTABI chapter 4.3.1
651  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
652  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
653  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
654  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
655  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
656  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
657  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
658  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
659  };
660 
661  for (const auto &LC : LibraryCalls) {
662  setLibcallName(LC.Op, LC.Name);
663  setLibcallCallingConv(LC.Op, LC.CC);
664  if (LC.Cond != ISD::SETCC_INVALID)
665  setCmpLibcallCC(LC.Op, LC.Cond);
666  }
667 
668  // EABI dependent RTLIB
669  if (TM.Options.EABIVersion == EABI::EABI4 ||
670  TM.Options.EABIVersion == EABI::EABI5) {
671  static const struct {
672  const RTLIB::Libcall Op;
673  const char *const Name;
674  const CallingConv::ID CC;
675  const ISD::CondCode Cond;
676  } MemOpsLibraryCalls[] = {
677  // Memory operations
678  // RTABI chapter 4.3.4
680  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
681  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
682  };
683 
684  for (const auto &LC : MemOpsLibraryCalls) {
685  setLibcallName(LC.Op, LC.Name);
686  setLibcallCallingConv(LC.Op, LC.CC);
687  if (LC.Cond != ISD::SETCC_INVALID)
688  setCmpLibcallCC(LC.Op, LC.Cond);
689  }
690  }
691  }
692 
693  if (Subtarget->isTargetWindows()) {
694  static const struct {
695  const RTLIB::Libcall Op;
696  const char * const Name;
697  const CallingConv::ID CC;
698  } LibraryCalls[] = {
699  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
700  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
701  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
702  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
703  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
704  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
705  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
706  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
707  };
708 
709  for (const auto &LC : LibraryCalls) {
710  setLibcallName(LC.Op, LC.Name);
711  setLibcallCallingConv(LC.Op, LC.CC);
712  }
713  }
714 
715  // Use divmod compiler-rt calls for iOS 5.0 and later.
716  if (Subtarget->isTargetMachO() &&
717  !(Subtarget->isTargetIOS() &&
718  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
719  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
720  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
721  }
722 
723  // The half <-> float conversion functions are always soft-float on
724  // non-watchos platforms, but are needed for some targets which use a
725  // hard-float calling convention by default.
726  if (!Subtarget->isTargetWatchABI()) {
727  if (Subtarget->isAAPCS_ABI()) {
728  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
729  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
730  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
731  } else {
732  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
733  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
734  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
735  }
736  }
737 
738  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
739  // a __gnu_ prefix (which is the default).
740  if (Subtarget->isTargetAEABI()) {
741  static const struct {
742  const RTLIB::Libcall Op;
743  const char * const Name;
744  const CallingConv::ID CC;
745  } LibraryCalls[] = {
746  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
747  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
748  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
749  };
750 
751  for (const auto &LC : LibraryCalls) {
752  setLibcallName(LC.Op, LC.Name);
753  setLibcallCallingConv(LC.Op, LC.CC);
754  }
755  }
756 
757  if (Subtarget->isThumb1Only())
758  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
759  else
760  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
761 
762  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
763  Subtarget->hasFPRegs()) {
764  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
765  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
766 
771 
772  if (!Subtarget->hasVFP2Base())
773  setAllExpand(MVT::f32);
774  if (!Subtarget->hasFP64())
775  setAllExpand(MVT::f64);
776  }
777 
778  if (Subtarget->hasFullFP16()) {
779  addRegisterClass(MVT::f16, &ARM::HPRRegClass);
782 
785  }
786 
787  if (Subtarget->hasBF16()) {
788  addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
789  setAllExpand(MVT::bf16);
790  if (!Subtarget->hasFullFP16())
792  }
793 
794  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
795  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
796  setTruncStoreAction(VT, InnerVT, Expand);
797  addAllExtLoads(VT, InnerVT, Expand);
798  }
799 
802 
804  }
805 
808 
811 
812  if (Subtarget->hasMVEIntegerOps())
813  addMVEVectorTypes(Subtarget->hasMVEFloatOps());
814 
815  // Combine low-overhead loop intrinsics so that we can lower i1 types.
816  if (Subtarget->hasLOB()) {
818  }
819 
820  if (Subtarget->hasNEON()) {
821  addDRTypeForNEON(MVT::v2f32);
822  addDRTypeForNEON(MVT::v8i8);
823  addDRTypeForNEON(MVT::v4i16);
824  addDRTypeForNEON(MVT::v2i32);
825  addDRTypeForNEON(MVT::v1i64);
826 
827  addQRTypeForNEON(MVT::v4f32);
828  addQRTypeForNEON(MVT::v2f64);
829  addQRTypeForNEON(MVT::v16i8);
830  addQRTypeForNEON(MVT::v8i16);
831  addQRTypeForNEON(MVT::v4i32);
832  addQRTypeForNEON(MVT::v2i64);
833 
834  if (Subtarget->hasFullFP16()) {
835  addQRTypeForNEON(MVT::v8f16);
836  addDRTypeForNEON(MVT::v4f16);
837  }
838 
839  if (Subtarget->hasBF16()) {
840  addQRTypeForNEON(MVT::v8bf16);
841  addDRTypeForNEON(MVT::v4bf16);
842  }
843  }
844 
845  if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
846  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
847  // none of Neon, MVE or VFP supports any arithmetic operations on it.
851  // FIXME: Code duplication: FDIV and FREM are expanded always, see
852  // ARMTargetLowering::addTypeForNEON method for details.
855  // FIXME: Create unittest.
856  // In another words, find a way when "copysign" appears in DAG with vector
857  // operands.
859  // FIXME: Code duplication: SETCC has custom operation action, see
860  // ARMTargetLowering::addTypeForNEON method for details.
862  // FIXME: Create unittest for FNEG and for FABS.
874  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
881  }
882 
883  if (Subtarget->hasNEON()) {
884  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
885  // supported for v4f32.
900 
901  // Mark v2f32 intrinsics.
916 
917  // Neon does not support some operations on v1i64 and v2i64 types.
919  // Custom handling for some quad-vector types to detect VMULL.
923  // Custom handling for some vector types to avoid expensive expansions
928  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
929  // a destination type that is wider than the source, and nor does
930  // it have a FP_TO_[SU]INT instruction with a narrower destination than
931  // source.
940 
943 
944  // NEON does not have single instruction CTPOP for vectors with element
945  // types wider than 8-bits. However, custom lowering can leverage the
946  // v8i8/v16i8 vcnt instruction.
953 
956 
957  // NEON does not have single instruction CTTZ for vectors.
962 
967 
972 
977 
978  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
981  }
982 
983  // NEON only has FMA instructions as of VFP4.
984  if (!Subtarget->hasVFP4Base()) {
987  }
988 
991 
992  // It is legal to extload from v4i8 to v4i16 or v4i32.
994  MVT::v2i32}) {
999  }
1000  }
1001  }
1002 
1003  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1010  }
1011  if (Subtarget->hasMVEIntegerOps()) {
1014  ISD::SETCC});
1015  }
1016  if (Subtarget->hasMVEFloatOps()) {
1018  }
1019 
1020  if (!Subtarget->hasFP64()) {
1021  // When targeting a floating-point unit with only single-precision
1022  // operations, f64 is legal for the few double-precision instructions which
1023  // are present However, no double-precision operations other than moves,
1024  // loads and stores are provided by the hardware.
1061  }
1062 
1063  if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1066  if (Subtarget->hasFullFP16()) {
1069  }
1070  }
1071 
1072  if (!Subtarget->hasFP16()) {
1075  }
1076 
1078 
1079  // ARM does not have floating-point extending loads.
1080  for (MVT VT : MVT::fp_valuetypes()) {
1083  }
1084 
1085  // ... or truncating stores
1089 
1090  // ARM does not have i1 sign extending load.
1091  for (MVT VT : MVT::integer_valuetypes())
1093 
1094  // ARM supports all 4 flavors of integer indexed load / store.
1095  if (!Subtarget->isThumb1Only()) {
1096  for (unsigned im = (unsigned)ISD::PRE_INC;
1097  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1106  }
1107  } else {
1108  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1111  }
1112 
1117 
1120  if (Subtarget->hasDSP()) {
1129  }
1130  if (Subtarget->hasBaseDSP()) {
1133  }
1134 
1135  // i64 operation support.
1138  if (Subtarget->isThumb1Only()) {
1141  }
1142  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1143  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1145 
1155 
1156  // MVE lowers 64 bit shifts to lsll and lsrl
1157  // assuming that ISD::SRL and SRA of i64 are already marked custom
1158  if (Subtarget->hasMVEIntegerOps())
1160 
1161  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1162  if (Subtarget->isThumb1Only()) {
1166  }
1167 
1168  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1170 
1171  // ARM does not have ROTL.
1173  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1176  }
1179  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1182  }
1183 
1184  // @llvm.readcyclecounter requires the Performance Monitors extension.
1185  // Default to the 0 expansion on unsupported platforms.
1186  // FIXME: Technically there are older ARM CPUs that have
1187  // implementation-specific ways of obtaining this information.
1188  if (Subtarget->hasPerfMon())
1190 
1191  // Only ARMv6 has BSWAP.
1192  if (!Subtarget->hasV6Ops())
1194 
1195  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1196  : Subtarget->hasDivideInARMMode();
1197  if (!hasDivide) {
1198  // These are expanded into libcalls if the cpu doesn't have HW divider.
1201  }
1202 
1203  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1206 
1209  }
1210 
1213 
1214  // Register based DivRem for AEABI (RTABI 4.2)
1215  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1216  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1217  Subtarget->isTargetWindows()) {
1220  HasStandaloneRem = false;
1221 
1222  if (Subtarget->isTargetWindows()) {
1223  const struct {
1224  const RTLIB::Libcall Op;
1225  const char * const Name;
1226  const CallingConv::ID CC;
1227  } LibraryCalls[] = {
1228  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1229  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1230  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1231  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1232 
1233  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1234  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1235  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1236  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1237  };
1238 
1239  for (const auto &LC : LibraryCalls) {
1240  setLibcallName(LC.Op, LC.Name);
1241  setLibcallCallingConv(LC.Op, LC.CC);
1242  }
1243  } else {
1244  const struct {
1245  const RTLIB::Libcall Op;
1246  const char * const Name;
1247  const CallingConv::ID CC;
1248  } LibraryCalls[] = {
1249  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1250  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1251  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1252  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1253 
1254  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1255  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1256  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1257  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1258  };
1259 
1260  for (const auto &LC : LibraryCalls) {
1261  setLibcallName(LC.Op, LC.Name);
1262  setLibcallCallingConv(LC.Op, LC.CC);
1263  }
1264  }
1265 
1270  } else {
1273  }
1274 
1275  if (Subtarget->getTargetTriple().isOSMSVCRT()) {
1276  // MSVCRT doesn't have powi; fall back to pow
1277  setLibcallName(RTLIB::POWI_F32, nullptr);
1278  setLibcallName(RTLIB::POWI_F64, nullptr);
1279  }
1280 
1285 
1288 
1289  // Use the default implementation.
1296 
1297  if (Subtarget->isTargetWindows())
1299  else
1301 
1302  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1303  // the default expansion.
1304  InsertFencesForAtomic = false;
1305  if (Subtarget->hasAnyDataBarrier() &&
1306  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1307  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1308  // to ldrex/strex loops already.
1310  if (!Subtarget->isThumb() || !Subtarget->isMClass())
1312 
1313  // On v8, we have particularly efficient implementations of atomic fences
1314  // if they can be combined with nearby atomic loads and stores.
1315  if (!Subtarget->hasAcquireRelease() ||
1316  getTargetMachine().getOptLevel() == 0) {
1317  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1318  InsertFencesForAtomic = true;
1319  }
1320  } else {
1321  // If there's anything we can use as a barrier, go through custom lowering
1322  // for ATOMIC_FENCE.
1323  // If target has DMB in thumb, Fences can be inserted.
1324  if (Subtarget->hasDataBarrier())
1325  InsertFencesForAtomic = true;
1326 
1328  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1329 
1330  // Set them all for expansion, which will force libcalls.
1343  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1344  // Unordered/Monotonic case.
1345  if (!InsertFencesForAtomic) {
1348  }
1349  }
1350 
1351  // Compute supported atomic widths.
1352  if (Subtarget->isTargetLinux() ||
1353  (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1354  // For targets where __sync_* routines are reliably available, we use them
1355  // if necessary.
1356  //
1357  // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1358  // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1359  //
1360  // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1361  // such targets should provide __sync_* routines, which use the ARM mode
1362  // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1363  // encoding; see ARMISD::MEMBARRIER_MCR.)
1365  } else if (Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) {
1366  // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1368  } else {
1369  // We can't assume anything about other targets; just use libatomic
1370  // routines.
1372  }
1373 
1375 
1376  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1377  if (!Subtarget->hasV6Ops()) {
1380  }
1382 
1383  if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1384  !Subtarget->isThumb1Only()) {
1385  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1386  // iff target supports vfp2.
1390  }
1391 
1392  // We want to custom lower some of our intrinsics.
1397  if (Subtarget->useSjLjEH())
1398  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1399 
1409  if (Subtarget->hasFullFP16()) {
1413  }
1414 
1416 
1419  if (Subtarget->hasFullFP16())
1424 
1425  // We don't support sin/cos/fmod/copysign/pow
1434  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1435  !Subtarget->isThumb1Only()) {
1438  }
1441 
1442  if (!Subtarget->hasVFP4Base()) {
1445  }
1446 
1447  // Various VFP goodness
1448  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1449  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1450  if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1453  }
1454 
1455  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1456  if (!Subtarget->hasFP16()) {
1459  }
1460 
1461  // Strict floating-point comparisons need custom lowering.
1468  }
1469 
1470  // Use __sincos_stret if available.
1471  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1472  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1475  }
1476 
1477  // FP-ARMv8 implements a lot of rounding-like FP operations.
1478  if (Subtarget->hasFPARMv8Base()) {
1487  if (Subtarget->hasNEON()) {
1492  }
1493 
1494  if (Subtarget->hasFP64()) {
1503  }
1504  }
1505 
1506  // FP16 often need to be promoted to call lib functions
1507  if (Subtarget->hasFullFP16()) {
1520 
1522  }
1523 
1524  if (Subtarget->hasNEON()) {
1525  // vmin and vmax aren't available in a scalar form, so we can use
1526  // a NEON instruction with an undef lane instead. This has a performance
1527  // penalty on some cores, so we don't do this unless we have been
1528  // asked to by the core tuning model.
1529  if (Subtarget->useNEONForSinglePrecisionFP()) {
1534  }
1539 
1540  if (Subtarget->hasFullFP16()) {
1545 
1550  }
1551  }
1552 
1553  // We have target-specific dag combine patterns for the following nodes:
1554  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1557 
1558  if (Subtarget->hasMVEIntegerOps())
1560 
1561  if (Subtarget->hasV6Ops())
1563  if (Subtarget->isThumb1Only())
1565  // Attempt to lower smin/smax to ssat/usat
1566  if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1567  Subtarget->isThumb2()) {
1569  }
1570 
1572 
1573  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1574  !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1576  else
1578 
1579  //// temporary - rewrite interface to use type
1580  MaxStoresPerMemset = 8;
1582  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1584  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1586 
1587  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1588  // are at least 4 bytes aligned.
1590 
1591  // Prefer likely predicted branches to selects on out-of-order cores.
1592  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1593 
1594  setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1595 
1596  setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1597 
1598  if (Subtarget->isThumb() || Subtarget->isThumb2())
1600 }
1601 
1603  return Subtarget->useSoftFloat();
1604 }
1605 
1606 // FIXME: It might make sense to define the representative register class as the
1607 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1608 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1609 // SPR's representative would be DPR_VFP2. This should work well if register
1610 // pressure tracking were modified such that a register use would increment the
1611 // pressure of the register class's representative and all of it's super
1612 // classes' representatives transitively. We have not implemented this because
1613 // of the difficulty prior to coalescing of modeling operand register classes
1614 // due to the common occurrence of cross class copies and subregister insertions
1615 // and extractions.
1616 std::pair<const TargetRegisterClass *, uint8_t>
1618  MVT VT) const {
1619  const TargetRegisterClass *RRC = nullptr;
1620  uint8_t Cost = 1;
1621  switch (VT.SimpleTy) {
1622  default:
1624  // Use DPR as representative register class for all floating point
1625  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1626  // the cost is 1 for both f32 and f64.
1627  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1628  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1629  RRC = &ARM::DPRRegClass;
1630  // When NEON is used for SP, only half of the register file is available
1631  // because operations that define both SP and DP results will be constrained
1632  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1633  // coalescing by double-counting the SP regs. See the FIXME above.
1634  if (Subtarget->useNEONForSinglePrecisionFP())
1635  Cost = 2;
1636  break;
1637  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1638  case MVT::v4f32: case MVT::v2f64:
1639  RRC = &ARM::DPRRegClass;
1640  Cost = 2;
1641  break;
1642  case MVT::v4i64:
1643  RRC = &ARM::DPRRegClass;
1644  Cost = 4;
1645  break;
1646  case MVT::v8i64:
1647  RRC = &ARM::DPRRegClass;
1648  Cost = 8;
1649  break;
1650  }
1651  return std::make_pair(RRC, Cost);
1652 }
1653 
1654 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1655 #define MAKE_CASE(V) \
1656  case V: \
1657  return #V;
1658  switch ((ARMISD::NodeType)Opcode) {
1659  case ARMISD::FIRST_NUMBER:
1660  break;
1864 #undef MAKE_CASE
1865  }
1866  return nullptr;
1867 }
1868 
1870  EVT VT) const {
1871  if (!VT.isVector())
1872  return getPointerTy(DL);
1873 
1874  // MVE has a predicate register.
1875  if ((Subtarget->hasMVEIntegerOps() &&
1876  (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1877  VT == MVT::v16i8)) ||
1878  (Subtarget->hasMVEFloatOps() &&
1879  (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1882 }
1883 
1884 /// getRegClassFor - Return the register class that should be used for the
1885 /// specified value type.
1886 const TargetRegisterClass *
1887 ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1888  (void)isDivergent;
1889  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1890  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1891  // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1892  // MVE Q registers.
1893  if (Subtarget->hasNEON()) {
1894  if (VT == MVT::v4i64)
1895  return &ARM::QQPRRegClass;
1896  if (VT == MVT::v8i64)
1897  return &ARM::QQQQPRRegClass;
1898  }
1899  if (Subtarget->hasMVEIntegerOps()) {
1900  if (VT == MVT::v4i64)
1901  return &ARM::MQQPRRegClass;
1902  if (VT == MVT::v8i64)
1903  return &ARM::MQQQQPRRegClass;
1904  }
1905  return TargetLowering::getRegClassFor(VT);
1906 }
1907 
1908 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1909 // source/dest is aligned and the copy size is large enough. We therefore want
1910 // to align such objects passed to memory intrinsics.
1912  Align &PrefAlign) const {
1913  if (!isa<MemIntrinsic>(CI))
1914  return false;
1915  MinSize = 8;
1916  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1917  // cycle faster than 4-byte aligned LDM.
1918  PrefAlign =
1919  (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1920  return true;
1921 }
1922 
1923 // Create a fast isel object.
1924 FastISel *
1926  const TargetLibraryInfo *libInfo) const {
1927  return ARM::createFastISel(funcInfo, libInfo);
1928 }
1929 
1931  unsigned NumVals = N->getNumValues();
1932  if (!NumVals)
1933  return Sched::RegPressure;
1934 
1935  for (unsigned i = 0; i != NumVals; ++i) {
1936  EVT VT = N->getValueType(i);
1937  if (VT == MVT::Glue || VT == MVT::Other)
1938  continue;
1939  if (VT.isFloatingPoint() || VT.isVector())
1940  return Sched::ILP;
1941  }
1942 
1943  if (!N->isMachineOpcode())
1944  return Sched::RegPressure;
1945 
1946  // Load are scheduled for latency even if there instruction itinerary
1947  // is not available.
1948  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1949  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1950 
1951  if (MCID.getNumDefs() == 0)
1952  return Sched::RegPressure;
1953  if (!Itins->isEmpty() &&
1954  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1955  return Sched::ILP;
1956 
1957  return Sched::RegPressure;
1958 }
1959 
1960 //===----------------------------------------------------------------------===//
1961 // Lowering Code
1962 //===----------------------------------------------------------------------===//
1963 
1964 static bool isSRL16(const SDValue &Op) {
1965  if (Op.getOpcode() != ISD::SRL)
1966  return false;
1967  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1968  return Const->getZExtValue() == 16;
1969  return false;
1970 }
1971 
1972 static bool isSRA16(const SDValue &Op) {
1973  if (Op.getOpcode() != ISD::SRA)
1974  return false;
1975  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1976  return Const->getZExtValue() == 16;
1977  return false;
1978 }
1979 
1980 static bool isSHL16(const SDValue &Op) {
1981  if (Op.getOpcode() != ISD::SHL)
1982  return false;
1983  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1984  return Const->getZExtValue() == 16;
1985  return false;
1986 }
1987 
1988 // Check for a signed 16-bit value. We special case SRA because it makes it
1989 // more simple when also looking for SRAs that aren't sign extending a
1990 // smaller value. Without the check, we'd need to take extra care with
1991 // checking order for some operations.
1992 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1993  if (isSRA16(Op))
1994  return isSHL16(Op.getOperand(0));
1995  return DAG.ComputeNumSignBits(Op) == 17;
1996 }
1997 
1998 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2000  switch (CC) {
2001  default: llvm_unreachable("Unknown condition code!");
2002  case ISD::SETNE: return ARMCC::NE;
2003  case ISD::SETEQ: return ARMCC::EQ;
2004  case ISD::SETGT: return ARMCC::GT;
2005  case ISD::SETGE: return ARMCC::GE;
2006  case ISD::SETLT: return ARMCC::LT;
2007  case ISD::SETLE: return ARMCC::LE;
2008  case ISD::SETUGT: return ARMCC::HI;
2009  case ISD::SETUGE: return ARMCC::HS;
2010  case ISD::SETULT: return ARMCC::LO;
2011  case ISD::SETULE: return ARMCC::LS;
2012  }
2013 }
2014 
2015 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2017  ARMCC::CondCodes &CondCode2) {
2018  CondCode2 = ARMCC::AL;
2019  switch (CC) {
2020  default: llvm_unreachable("Unknown FP condition!");
2021  case ISD::SETEQ:
2022  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
2023  case ISD::SETGT:
2024  case ISD::SETOGT: CondCode = ARMCC::GT; break;
2025  case ISD::SETGE:
2026  case ISD::SETOGE: CondCode = ARMCC::GE; break;
2027  case ISD::SETOLT: CondCode = ARMCC::MI; break;
2028  case ISD::SETOLE: CondCode = ARMCC::LS; break;
2029  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2030  case ISD::SETO: CondCode = ARMCC::VC; break;
2031  case ISD::SETUO: CondCode = ARMCC::VS; break;
2032  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2033  case ISD::SETUGT: CondCode = ARMCC::HI; break;
2034  case ISD::SETUGE: CondCode = ARMCC::PL; break;
2035  case ISD::SETLT:
2036  case ISD::SETULT: CondCode = ARMCC::LT; break;
2037  case ISD::SETLE:
2038  case ISD::SETULE: CondCode = ARMCC::LE; break;
2039  case ISD::SETNE:
2040  case ISD::SETUNE: CondCode = ARMCC::NE; break;
2041  }
2042 }
2043 
2044 //===----------------------------------------------------------------------===//
2045 // Calling Convention Implementation
2046 //===----------------------------------------------------------------------===//
2047 
2048 /// getEffectiveCallingConv - Get the effective calling convention, taking into
2049 /// account presence of floating point hardware and calling convention
2050 /// limitations, such as support for variadic functions.
2052 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2053  bool isVarArg) const {
2054  switch (CC) {
2055  default:
2056  report_fatal_error("Unsupported calling convention");
2058  case CallingConv::ARM_APCS:
2059  case CallingConv::GHC:
2061  return CC;
2065  case CallingConv::Swift:
2068  case CallingConv::C:
2069  case CallingConv::Tail:
2070  if (!Subtarget->isAAPCS_ABI())
2071  return CallingConv::ARM_APCS;
2072  else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
2073  getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2074  !isVarArg)
2076  else
2077  return CallingConv::ARM_AAPCS;
2078  case CallingConv::Fast:
2080  if (!Subtarget->isAAPCS_ABI()) {
2081  if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2082  return CallingConv::Fast;
2083  return CallingConv::ARM_APCS;
2084  } else if (Subtarget->hasVFP2Base() &&
2085  !Subtarget->isThumb1Only() && !isVarArg)
2087  else
2088  return CallingConv::ARM_AAPCS;
2089  }
2090 }
2091 
2093  bool isVarArg) const {
2094  return CCAssignFnForNode(CC, false, isVarArg);
2095 }
2096 
2098  bool isVarArg) const {
2099  return CCAssignFnForNode(CC, true, isVarArg);
2100 }
2101 
2102 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2103 /// CallingConvention.
2104 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2105  bool Return,
2106  bool isVarArg) const {
2107  switch (getEffectiveCallingConv(CC, isVarArg)) {
2108  default:
2109  report_fatal_error("Unsupported calling convention");
2110  case CallingConv::ARM_APCS:
2111  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2113  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2115  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2116  case CallingConv::Fast:
2117  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2118  case CallingConv::GHC:
2119  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2121  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2123  return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2124  }
2125 }
2126 
2127 SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2128  MVT LocVT, MVT ValVT, SDValue Val) const {
2129  Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2130  Val);
2131  if (Subtarget->hasFullFP16()) {
2132  Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2133  } else {
2134  Val = DAG.getNode(ISD::TRUNCATE, dl,
2135  MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2136  Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2137  }
2138  return Val;
2139 }
2140 
2141 SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2142  MVT LocVT, MVT ValVT,
2143  SDValue Val) const {
2144  if (Subtarget->hasFullFP16()) {
2145  Val = DAG.getNode(ARMISD::VMOVrh, dl,
2146  MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2147  } else {
2148  Val = DAG.getNode(ISD::BITCAST, dl,
2149  MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2150  Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2151  MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2152  }
2153  return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2154 }
2155 
2156 /// LowerCallResult - Lower the result values of a call into the
2157 /// appropriate copies out of appropriate physical registers.
2158 SDValue ARMTargetLowering::LowerCallResult(
2159  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2160  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2161  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2162  SDValue ThisVal) const {
2163  // Assign locations to each value returned by this call.
2165  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2166  *DAG.getContext());
2167  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2168 
2169  // Copy all of the result registers out of their specified physreg.
2170  for (unsigned i = 0; i != RVLocs.size(); ++i) {
2171  CCValAssign VA = RVLocs[i];
2172 
2173  // Pass 'this' value directly from the argument to return value, to avoid
2174  // reg unit interference
2175  if (i == 0 && isThisReturn) {
2176  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2177  "unexpected return calling convention register assignment");
2178  InVals.push_back(ThisVal);
2179  continue;
2180  }
2181 
2182  SDValue Val;
2183  if (VA.needsCustom() &&
2184  (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2185  // Handle f64 or half of a v2f64.
2186  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2187  InFlag);
2188  Chain = Lo.getValue(1);
2189  InFlag = Lo.getValue(2);
2190  VA = RVLocs[++i]; // skip ahead to next loc
2191  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2192  InFlag);
2193  Chain = Hi.getValue(1);
2194  InFlag = Hi.getValue(2);
2195  if (!Subtarget->isLittle())
2196  std::swap (Lo, Hi);
2197  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2198 
2199  if (VA.getLocVT() == MVT::v2f64) {
2200  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2201  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2202  DAG.getConstant(0, dl, MVT::i32));
2203 
2204  VA = RVLocs[++i]; // skip ahead to next loc
2205  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2206  Chain = Lo.getValue(1);
2207  InFlag = Lo.getValue(2);
2208  VA = RVLocs[++i]; // skip ahead to next loc
2209  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2210  Chain = Hi.getValue(1);
2211  InFlag = Hi.getValue(2);
2212  if (!Subtarget->isLittle())
2213  std::swap (Lo, Hi);
2214  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2215  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2216  DAG.getConstant(1, dl, MVT::i32));
2217  }
2218  } else {
2219  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2220  InFlag);
2221  Chain = Val.getValue(1);
2222  InFlag = Val.getValue(2);
2223  }
2224 
2225  switch (VA.getLocInfo()) {
2226  default: llvm_unreachable("Unknown loc info!");
2227  case CCValAssign::Full: break;
2228  case CCValAssign::BCvt:
2229  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2230  break;
2231  }
2232 
2233  // f16 arguments have their size extended to 4 bytes and passed as if they
2234  // had been copied to the LSBs of a 32-bit register.
2235  // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2236  if (VA.needsCustom() &&
2237  (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2238  Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2239 
2240  InVals.push_back(Val);
2241  }
2242 
2243  return Chain;
2244 }
2245 
2246 std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2247  const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2248  bool IsTailCall, int SPDiff) const {
2249  SDValue DstAddr;
2250  MachinePointerInfo DstInfo;
2251  int32_t Offset = VA.getLocMemOffset();
2252  MachineFunction &MF = DAG.getMachineFunction();
2253 
2254  if (IsTailCall) {
2255  Offset += SPDiff;
2256  auto PtrVT = getPointerTy(DAG.getDataLayout());
2257  int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2258  int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2259  DstAddr = DAG.getFrameIndex(FI, PtrVT);
2260  DstInfo =
2262  } else {
2263  SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2264  DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2265  StackPtr, PtrOff);
2266  DstInfo =
2268  }
2269 
2270  return std::make_pair(DstAddr, DstInfo);
2271 }
2272 
2273 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2274  SDValue Chain, SDValue &Arg,
2275  RegsToPassVector &RegsToPass,
2276  CCValAssign &VA, CCValAssign &NextVA,
2277  SDValue &StackPtr,
2278  SmallVectorImpl<SDValue> &MemOpChains,
2279  bool IsTailCall,
2280  int SPDiff) const {
2281  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2282  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2283  unsigned id = Subtarget->isLittle() ? 0 : 1;
2284  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2285 
2286  if (NextVA.isRegLoc())
2287  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2288  else {
2289  assert(NextVA.isMemLoc());
2290  if (!StackPtr.getNode())
2291  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2292  getPointerTy(DAG.getDataLayout()));
2293 
2294  SDValue DstAddr;
2295  MachinePointerInfo DstInfo;
2296  std::tie(DstAddr, DstInfo) =
2297  computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2298  MemOpChains.push_back(
2299  DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2300  }
2301 }
2302 
2303 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2304  return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2306 }
2307 
2308 /// LowerCall - Lowering a call into a callseq_start <-
2309 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2310 /// nodes.
2311 SDValue
2312 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2313  SmallVectorImpl<SDValue> &InVals) const {
2314  SelectionDAG &DAG = CLI.DAG;
2315  SDLoc &dl = CLI.DL;
2317  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2319  SDValue Chain = CLI.Chain;
2320  SDValue Callee = CLI.Callee;
2321  bool &isTailCall = CLI.IsTailCall;
2322  CallingConv::ID CallConv = CLI.CallConv;
2323  bool doesNotRet = CLI.DoesNotReturn;
2324  bool isVarArg = CLI.IsVarArg;
2325 
2326  MachineFunction &MF = DAG.getMachineFunction();
2329  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2330  bool isThisReturn = false;
2331  bool isCmseNSCall = false;
2332  bool isSibCall = false;
2333  bool PreferIndirect = false;
2334  bool GuardWithBTI = false;
2335 
2336  // Lower 'returns_twice' calls to a pseudo-instruction.
2337  if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2338  !Subtarget->noBTIAtReturnTwice())
2339  GuardWithBTI = AFI->branchTargetEnforcement();
2340 
2341  // Determine whether this is a non-secure function call.
2342  if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2343  isCmseNSCall = true;
2344 
2345  // Disable tail calls if they're not supported.
2346  if (!Subtarget->supportsTailCall())
2347  isTailCall = false;
2348 
2349  // For both the non-secure calls and the returns from a CMSE entry function,
2350  // the function needs to do some extra work afte r the call, or before the
2351  // return, respectively, thus it cannot end with atail call
2352  if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2353  isTailCall = false;
2354 
2355  if (isa<GlobalAddressSDNode>(Callee)) {
2356  // If we're optimizing for minimum size and the function is called three or
2357  // more times in this block, we can improve codesize by calling indirectly
2358  // as BLXr has a 16-bit encoding.
2359  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2360  if (CLI.CB) {
2361  auto *BB = CLI.CB->getParent();
2362  PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2363  count_if(GV->users(), [&BB](const User *U) {
2364  return isa<Instruction>(U) &&
2365  cast<Instruction>(U)->getParent() == BB;
2366  }) > 2;
2367  }
2368  }
2369  if (isTailCall) {
2370  // Check if it's really possible to do a tail call.
2371  isTailCall = IsEligibleForTailCallOptimization(
2372  Callee, CallConv, isVarArg, isStructRet,
2373  MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2374  PreferIndirect);
2375 
2376  if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2377  CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2378  isSibCall = true;
2379 
2380  // We don't support GuaranteedTailCallOpt for ARM, only automatically
2381  // detected sibcalls.
2382  if (isTailCall)
2383  ++NumTailCalls;
2384  }
2385 
2386  if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2387  report_fatal_error("failed to perform tail call elimination on a call "
2388  "site marked musttail");
2389  // Analyze operands of the call, assigning locations to each operand.
2391  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2392  *DAG.getContext());
2393  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2394 
2395  // Get a count of how many bytes are to be pushed on the stack.
2396  unsigned NumBytes = CCInfo.getNextStackOffset();
2397 
2398  // SPDiff is the byte offset of the call's argument area from the callee's.
2399  // Stores to callee stack arguments will be placed in FixedStackSlots offset
2400  // by this amount for a tail call. In a sibling call it must be 0 because the
2401  // caller will deallocate the entire stack and the callee still expects its
2402  // arguments to begin at SP+0. Completely unused for non-tail calls.
2403  int SPDiff = 0;
2404 
2405  if (isTailCall && !isSibCall) {
2406  auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2407  unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2408 
2409  // Since callee will pop argument stack as a tail call, we must keep the
2410  // popped size 16-byte aligned.
2412  NumBytes = alignTo(NumBytes, StackAlign);
2413 
2414  // SPDiff will be negative if this tail call requires more space than we
2415  // would automatically have in our incoming argument space. Positive if we
2416  // can actually shrink the stack.
2417  SPDiff = NumReusableBytes - NumBytes;
2418 
2419  // If this call requires more stack than we have available from
2420  // LowerFormalArguments, tell FrameLowering to reserve space for it.
2421  if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2422  AFI->setArgRegsSaveSize(-SPDiff);
2423  }
2424 
2425  if (isSibCall) {
2426  // For sibling tail calls, memory operands are available in our caller's stack.
2427  NumBytes = 0;
2428  } else {
2429  // Adjust the stack pointer for the new arguments...
2430  // These operations are automatically eliminated by the prolog/epilog pass
2431  Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2432  }
2433 
2434  SDValue StackPtr =
2435  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2436 
2437  RegsToPassVector RegsToPass;
2438  SmallVector<SDValue, 8> MemOpChains;
2439 
2440  // During a tail call, stores to the argument area must happen after all of
2441  // the function's incoming arguments have been loaded because they may alias.
2442  // This is done by folding in a TokenFactor from LowerFormalArguments, but
2443  // there's no point in doing so repeatedly so this tracks whether that's
2444  // happened yet.
2445  bool AfterFormalArgLoads = false;
2446 
2447  // Walk the register/memloc assignments, inserting copies/loads. In the case
2448  // of tail call optimization, arguments are handled later.
2449  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2450  i != e;
2451  ++i, ++realArgIdx) {
2452  CCValAssign &VA = ArgLocs[i];
2453  SDValue Arg = OutVals[realArgIdx];
2454  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2455  bool isByVal = Flags.isByVal();
2456 
2457  // Promote the value if needed.
2458  switch (VA.getLocInfo()) {
2459  default: llvm_unreachable("Unknown loc info!");
2460  case CCValAssign::Full: break;
2461  case CCValAssign::SExt:
2462  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2463  break;
2464  case CCValAssign::ZExt:
2465  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2466  break;
2467  case CCValAssign::AExt:
2468  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2469  break;
2470  case CCValAssign::BCvt:
2471  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2472  break;
2473  }
2474 
2475  if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2476  Chain = DAG.getStackArgumentTokenFactor(Chain);
2477  AfterFormalArgLoads = true;
2478  }
2479 
2480  // f16 arguments have their size extended to 4 bytes and passed as if they
2481  // had been copied to the LSBs of a 32-bit register.
2482  // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2483  if (VA.needsCustom() &&
2484  (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2485  Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2486  } else {
2487  // f16 arguments could have been extended prior to argument lowering.
2488  // Mask them arguments if this is a CMSE nonsecure call.
2489  auto ArgVT = Outs[realArgIdx].ArgVT;
2490  if (isCmseNSCall && (ArgVT == MVT::f16)) {
2491  auto LocBits = VA.getLocVT().getSizeInBits();
2492  auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2493  SDValue Mask =
2494  DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2495  Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2496  Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2497  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2498  }
2499  }
2500 
2501  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2502  if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2504  DAG.getConstant(0, dl, MVT::i32));
2506  DAG.getConstant(1, dl, MVT::i32));
2507 
2508  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2509  StackPtr, MemOpChains, isTailCall, SPDiff);
2510 
2511  VA = ArgLocs[++i]; // skip ahead to next loc
2512  if (VA.isRegLoc()) {
2513  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2514  StackPtr, MemOpChains, isTailCall, SPDiff);
2515  } else {
2516  assert(VA.isMemLoc());
2517  SDValue DstAddr;
2518  MachinePointerInfo DstInfo;
2519  std::tie(DstAddr, DstInfo) =
2520  computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2521  MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2522  }
2523  } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2524  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2525  StackPtr, MemOpChains, isTailCall, SPDiff);
2526  } else if (VA.isRegLoc()) {
2527  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2528  Outs[0].VT == MVT::i32) {
2529  assert(VA.getLocVT() == MVT::i32 &&
2530  "unexpected calling convention register assignment");
2531  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2532  "unexpected use of 'returned'");
2533  isThisReturn = true;
2534  }
2535  const TargetOptions &Options = DAG.getTarget().Options;
2536  if (Options.EmitCallSiteInfo)
2537  CSInfo.emplace_back(VA.getLocReg(), i);
2538  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2539  } else if (isByVal) {
2540  assert(VA.isMemLoc());
2541  unsigned offset = 0;
2542 
2543  // True if this byval aggregate will be split between registers
2544  // and memory.
2545  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2546  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2547 
2548  if (CurByValIdx < ByValArgsCount) {
2549 
2550  unsigned RegBegin, RegEnd;
2551  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2552 
2553  EVT PtrVT =
2555  unsigned int i, j;
2556  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2557  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2558  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2559  SDValue Load =
2560  DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2561  DAG.InferPtrAlign(AddArg));
2562  MemOpChains.push_back(Load.getValue(1));
2563  RegsToPass.push_back(std::make_pair(j, Load));
2564  }
2565 
2566  // If parameter size outsides register area, "offset" value
2567  // helps us to calculate stack slot for remained part properly.
2568  offset = RegEnd - RegBegin;
2569 
2570  CCInfo.nextInRegsParam();
2571  }
2572 
2573  if (Flags.getByValSize() > 4*offset) {
2574  auto PtrVT = getPointerTy(DAG.getDataLayout());
2575  SDValue Dst;
2576  MachinePointerInfo DstInfo;
2577  std::tie(Dst, DstInfo) =
2578  computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2579  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2580  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2581  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2582  MVT::i32);
2583  SDValue AlignNode =
2584  DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2585 
2586  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2587  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2588  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2589  Ops));
2590  }
2591  } else {
2592  assert(VA.isMemLoc());
2593  SDValue DstAddr;
2594  MachinePointerInfo DstInfo;
2595  std::tie(DstAddr, DstInfo) =
2596  computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2597 
2598  SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2599  MemOpChains.push_back(Store);
2600  }
2601  }
2602 
2603  if (!MemOpChains.empty())
2604  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2605 
2606  // Build a sequence of copy-to-reg nodes chained together with token chain
2607  // and flag operands which copy the outgoing args into the appropriate regs.
2608  SDValue InFlag;
2609  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2610  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2611  RegsToPass[i].second, InFlag);
2612  InFlag = Chain.getValue(1);
2613  }
2614 
2615  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2616  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2617  // node so that legalize doesn't hack it.
2618  bool isDirect = false;
2619 
2620  const TargetMachine &TM = getTargetMachine();
2621  const Module *Mod = MF.getFunction().getParent();
2622  const GlobalValue *GV = nullptr;
2623  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2624  GV = G->getGlobal();
2625  bool isStub =
2626  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2627 
2628  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2629  bool isLocalARMFunc = false;
2630  auto PtrVt = getPointerTy(DAG.getDataLayout());
2631 
2632  if (Subtarget->genLongCalls()) {
2633  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2634  "long-calls codegen is not position independent!");
2635  // Handle a global address or an external symbol. If it's not one of
2636  // those, the target's already in a register, so we don't need to do
2637  // anything extra.
2638  if (isa<GlobalAddressSDNode>(Callee)) {
2639  // Create a constant pool entry for the callee address
2640  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2641  ARMConstantPoolValue *CPV =
2642  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2643 
2644  // Get the address of the callee into a register
2645  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2646  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2647  Callee = DAG.getLoad(
2648  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2650  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2651  const char *Sym = S->getSymbol();
2652 
2653  // Create a constant pool entry for the callee address
2654  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2655  ARMConstantPoolValue *CPV =
2657  ARMPCLabelIndex, 0);
2658  // Get the address of the callee into a register
2659  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2660  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2661  Callee = DAG.getLoad(
2662  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2664  }
2665  } else if (isa<GlobalAddressSDNode>(Callee)) {
2666  if (!PreferIndirect) {
2667  isDirect = true;
2668  bool isDef = GV->isStrongDefinitionForLinker();
2669 
2670  // ARM call to a local ARM function is predicable.
2671  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2672  // tBX takes a register source operand.
2673  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2674  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2675  Callee = DAG.getNode(
2676  ARMISD::WrapperPIC, dl, PtrVt,
2677  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2678  Callee = DAG.getLoad(
2679  PtrVt, dl, DAG.getEntryNode(), Callee,
2683  } else if (Subtarget->isTargetCOFF()) {
2684  assert(Subtarget->isTargetWindows() &&
2685  "Windows is the only supported COFF target");
2686  unsigned TargetFlags = ARMII::MO_NO_FLAG;
2687  if (GV->hasDLLImportStorageClass())
2688  TargetFlags = ARMII::MO_DLLIMPORT;
2689  else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
2690  TargetFlags = ARMII::MO_COFFSTUB;
2691  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
2692  TargetFlags);
2693  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2694  Callee =
2695  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2696  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2698  } else {
2699  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2700  }
2701  }
2702  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2703  isDirect = true;
2704  // tBX takes a register source operand.
2705  const char *Sym = S->getSymbol();
2706  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2707  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2708  ARMConstantPoolValue *CPV =
2710  ARMPCLabelIndex, 4);
2711  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2712  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2713  Callee = DAG.getLoad(
2714  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2716  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2717  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2718  } else {
2719  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2720  }
2721  }
2722 
2723  if (isCmseNSCall) {
2724  assert(!isARMFunc && !isDirect &&
2725  "Cannot handle call to ARM function or direct call");
2726  if (NumBytes > 0) {
2728  "call to non-secure function would "
2729  "require passing arguments on stack",
2730  dl.getDebugLoc());
2731  DAG.getContext()->diagnose(Diag);
2732  }
2733  if (isStructRet) {
2736  "call to non-secure function would return value through pointer",
2737  dl.getDebugLoc());
2738  DAG.getContext()->diagnose(Diag);
2739  }
2740  }
2741 
2742  // FIXME: handle tail calls differently.
2743  unsigned CallOpc;
2744  if (Subtarget->isThumb()) {
2745  if (GuardWithBTI)
2746  CallOpc = ARMISD::t2CALL_BTI;
2747  else if (isCmseNSCall)
2748  CallOpc = ARMISD::tSECALL;
2749  else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2750  CallOpc = ARMISD::CALL_NOLINK;
2751  else
2752  CallOpc = ARMISD::CALL;
2753  } else {
2754  if (!isDirect && !Subtarget->hasV5TOps())
2755  CallOpc = ARMISD::CALL_NOLINK;
2756  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2757  // Emit regular call when code size is the priority
2758  !Subtarget->hasMinSize())
2759  // "mov lr, pc; b _foo" to avoid confusing the RSP
2760  CallOpc = ARMISD::CALL_NOLINK;
2761  else
2762  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2763  }
2764 
2765  // We don't usually want to end the call-sequence here because we would tidy
2766  // the frame up *after* the call, however in the ABI-changing tail-call case
2767  // we've carefully laid out the parameters so that when sp is reset they'll be
2768  // in the correct location.
2769  if (isTailCall && !isSibCall) {
2770  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
2771  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2772  InFlag = Chain.getValue(1);
2773  }
2774 
2775  std::vector<SDValue> Ops;
2776  Ops.push_back(Chain);
2777  Ops.push_back(Callee);
2778 
2779  if (isTailCall) {
2780  Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32));
2781  }
2782 
2783  // Add argument registers to the end of the list so that they are known live
2784  // into the call.
2785  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2786  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2787  RegsToPass[i].second.getValueType()));
2788 
2789  // Add a register mask operand representing the call-preserved registers.
2790  const uint32_t *Mask;
2791  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2792  if (isThisReturn) {
2793  // For 'this' returns, use the R0-preserving mask if applicable
2794  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2795  if (!Mask) {
2796  // Set isThisReturn to false if the calling convention is not one that
2797  // allows 'returned' to be modeled in this way, so LowerCallResult does
2798  // not try to pass 'this' straight through
2799  isThisReturn = false;
2800  Mask = ARI->getCallPreservedMask(MF, CallConv);
2801  }
2802  } else
2803  Mask = ARI->getCallPreservedMask(MF, CallConv);
2804 
2805  assert(Mask && "Missing call preserved mask for calling convention");
2806  Ops.push_back(DAG.getRegisterMask(Mask));
2807 
2808  if (InFlag.getNode())
2809  Ops.push_back(InFlag);
2810 
2811  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2812  if (isTailCall) {
2814  SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2815  DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2816  return Ret;
2817  }
2818 
2819  // Returns a chain and a flag for retval copy to use.
2820  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2821  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2822  InFlag = Chain.getValue(1);
2823  DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2824 
2825  // If we're guaranteeing tail-calls will be honoured, the callee must
2826  // pop its own argument stack on return. But this call is *not* a tail call so
2827  // we need to undo that after it returns to restore the status-quo.
2828  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
2829  uint64_t CalleePopBytes =
2830  canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
2831 
2832  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2833  DAG.getIntPtrConstant(CalleePopBytes, dl, true),
2834  InFlag, dl);
2835  if (!Ins.empty())
2836  InFlag = Chain.getValue(1);
2837 
2838  // Handle result values, copying them out of physregs into vregs that we
2839  // return.
2840  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2841  InVals, isThisReturn,
2842  isThisReturn ? OutVals[0] : SDValue());
2843 }
2844 
2845 /// HandleByVal - Every parameter *after* a byval parameter is passed
2846 /// on the stack. Remember the next parameter register to allocate,
2847 /// and then confiscate the rest of the parameter registers to insure
2848 /// this.
2849 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2850  Align Alignment) const {
2851  // Byval (as with any stack) slots are always at least 4 byte aligned.
2852  Alignment = std::max(Alignment, Align(4));
2853 
2854  unsigned Reg = State->AllocateReg(GPRArgRegs);
2855  if (!Reg)
2856  return;
2857 
2858  unsigned AlignInRegs = Alignment.value() / 4;
2859  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2860  for (unsigned i = 0; i < Waste; ++i)
2861  Reg = State->AllocateReg(GPRArgRegs);
2862 
2863  if (!Reg)
2864  return;
2865 
2866  unsigned Excess = 4 * (ARM::R4 - Reg);
2867 
2868  // Special case when NSAA != SP and parameter size greater than size of
2869  // all remained GPR regs. In that case we can't split parameter, we must
2870  // send it to stack. We also must set NCRN to R4, so waste all
2871  // remained registers.
2872  const unsigned NSAAOffset = State->getNextStackOffset();
2873  if (NSAAOffset != 0 && Size > Excess) {
2874  while (State->AllocateReg(GPRArgRegs))
2875  ;
2876  return;
2877  }
2878 
2879  // First register for byval parameter is the first register that wasn't
2880  // allocated before this method call, so it would be "reg".
2881  // If parameter is small enough to be saved in range [reg, r4), then
2882  // the end (first after last) register would be reg + param-size-in-regs,
2883  // else parameter would be splitted between registers and stack,
2884  // end register would be r4 in this case.
2885  unsigned ByValRegBegin = Reg;
2886  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2887  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2888  // Note, first register is allocated in the beginning of function already,
2889  // allocate remained amount of registers we need.
2890  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2891  State->AllocateReg(GPRArgRegs);
2892  // A byval parameter that is split between registers and memory needs its
2893  // size truncated here.
2894  // In the case where the entire structure fits in registers, we set the
2895  // size in memory to zero.
2896  Size = std::max<int>(Size - Excess, 0);
2897 }
2898 
2899 /// MatchingStackOffset - Return true if the given stack call argument is
2900 /// already available in the same position (relatively) of the caller's
2901 /// incoming argument stack.
2902 static
2903 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2905  const TargetInstrInfo *TII) {
2906  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2907  int FI = std::numeric_limits<int>::max();
2908  if (Arg.getOpcode() == ISD::CopyFromReg) {
2909  Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2910  if (!Register::isVirtualRegister(VR))
2911  return false;
2912  MachineInstr *Def = MRI->getVRegDef(VR);
2913  if (!Def)
2914  return false;
2915  if (!Flags.isByVal()) {
2916  if (!TII->isLoadFromStackSlot(*Def, FI))
2917  return false;
2918  } else {
2919  return false;
2920  }
2921  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2922  if (Flags.isByVal())
2923  // ByVal argument is passed in as a pointer but it's now being
2924  // dereferenced. e.g.
2925  // define @foo(%struct.X* %A) {
2926  // tail call @bar(%struct.X* byval %A)
2927  // }
2928  return false;
2929  SDValue Ptr = Ld->getBasePtr();
2930  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2931  if (!FINode)
2932  return false;
2933  FI = FINode->getIndex();
2934  } else
2935  return false;
2936 
2938  if (!MFI.isFixedObjectIndex(FI))
2939  return false;
2940  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2941 }
2942 
2943 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2944 /// for tail call optimization. Targets which want to do tail call
2945 /// optimization should implement this function.
2946 bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2947  SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2948  bool isCalleeStructRet, bool isCallerStructRet,
2949  const SmallVectorImpl<ISD::OutputArg> &Outs,
2950  const SmallVectorImpl<SDValue> &OutVals,
2952  const bool isIndirect) const {
2953  MachineFunction &MF = DAG.getMachineFunction();
2954  const Function &CallerF = MF.getFunction();
2955  CallingConv::ID CallerCC = CallerF.getCallingConv();
2956 
2957  assert(Subtarget->supportsTailCall());
2958 
2959  // Indirect tail calls cannot be optimized for Thumb1 if the args
2960  // to the call take up r0-r3. The reason is that there are no legal registers
2961  // left to hold the pointer to the function to be called.
2962  // Similarly, if the function uses return address sign and authentication,
2963  // r12 is needed to hold the PAC and is not available to hold the callee
2964  // address.
2965  if (Outs.size() >= 4 &&
2966  (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) {
2967  if (Subtarget->isThumb1Only())
2968  return false;
2969  // Conservatively assume the function spills LR.
2971  return false;
2972  }
2973 
2974  // Look for obvious safe cases to perform tail call optimization that do not
2975  // require ABI changes. This is what gcc calls sibcall.
2976 
2977  // Exception-handling functions need a special set of instructions to indicate
2978  // a return to the hardware. Tail-calling another function would probably
2979  // break this.
2980  if (CallerF.hasFnAttribute("interrupt"))
2981  return false;
2982 
2983  if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
2984  return CalleeCC == CallerCC;
2985 
2986  // Also avoid sibcall optimization if either caller or callee uses struct
2987  // return semantics.
2988  if (isCalleeStructRet || isCallerStructRet)
2989  return false;
2990 
2991  // Externally-defined functions with weak linkage should not be
2992  // tail-called on ARM when the OS does not support dynamic
2993  // pre-emption of symbols, as the AAELF spec requires normal calls
2994  // to undefined weak functions to be replaced with a NOP or jump to the
2995  // next instruction. The behaviour of branch instructions in this
2996  // situation (as used for tail calls) is implementation-defined, so we
2997  // cannot rely on the linker replacing the tail call with a return.
2998  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2999  const GlobalValue *GV = G->getGlobal();
3001  if (GV->hasExternalWeakLinkage() &&
3002  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3003  return false;
3004  }
3005 
3006  // Check that the call results are passed in the same way.
3007  LLVMContext &C = *DAG.getContext();
3009  getEffectiveCallingConv(CalleeCC, isVarArg),
3010  getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3011  CCAssignFnForReturn(CalleeCC, isVarArg),
3012  CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
3013  return false;
3014  // The callee has to preserve all registers the caller needs to preserve.
3015  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3016  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3017  if (CalleeCC != CallerCC) {
3018  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3019  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3020  return false;
3021  }
3022 
3023  // If Caller's vararg or byval argument has been split between registers and
3024  // stack, do not perform tail call, since part of the argument is in caller's
3025  // local frame.
3026  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3027  if (AFI_Caller->getArgRegsSaveSize())
3028  return false;
3029 
3030  // If the callee takes no arguments then go on to check the results of the
3031  // call.
3032  if (!Outs.empty()) {
3033  // Check if stack adjustment is needed. For now, do not do this if any
3034  // argument is passed on the stack.
3036  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3037  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3038  if (CCInfo.getNextStackOffset()) {
3039  // Check if the arguments are already laid out in the right way as
3040  // the caller's fixed stack objects.
3041  MachineFrameInfo &MFI = MF.getFrameInfo();
3042  const MachineRegisterInfo *MRI = &MF.getRegInfo();
3043  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3044  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3045  i != e;
3046  ++i, ++realArgIdx) {
3047  CCValAssign &VA = ArgLocs[i];
3048  EVT RegVT = VA.getLocVT();
3049  SDValue Arg = OutVals[realArgIdx];
3050  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3051  if (VA.getLocInfo() == CCValAssign::Indirect)
3052  return false;
3053  if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3054  // f64 and vector types are split into multiple registers or
3055  // register/stack-slot combinations. The types will not match
3056  // the registers; give up on memory f64 refs until we figure
3057  // out what to do about this.
3058  if (!VA.isRegLoc())
3059  return false;
3060  if (!ArgLocs[++i].isRegLoc())
3061  return false;
3062  if (RegVT == MVT::v2f64) {
3063  if (!ArgLocs[++i].isRegLoc())
3064  return false;
3065  if (!ArgLocs[++i].isRegLoc())
3066  return false;
3067  }
3068  } else if (!VA.isRegLoc()) {
3069  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
3070  MFI, MRI, TII))
3071  return false;
3072  }
3073  }
3074  }
3075 
3076  const MachineRegisterInfo &MRI = MF.getRegInfo();
3077  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3078  return false;
3079  }
3080 
3081  return true;
3082 }
3083 
3084 bool
3085 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3086  MachineFunction &MF, bool isVarArg,
3087  const SmallVectorImpl<ISD::OutputArg> &Outs,
3088  LLVMContext &Context) const {
3090  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3091  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3092 }
3093 
3095  const SDLoc &DL, SelectionDAG &DAG) {
3096  const MachineFunction &MF = DAG.getMachineFunction();
3097  const Function &F = MF.getFunction();
3098 
3099  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3100 
3101  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3102  // version of the "preferred return address". These offsets affect the return
3103  // instruction if this is a return from PL1 without hypervisor extensions.
3104  // IRQ/FIQ: +4 "subs pc, lr, #4"
3105  // SWI: 0 "subs pc, lr, #0"
3106  // ABORT: +4 "subs pc, lr, #4"
3107  // UNDEF: +4/+2 "subs pc, lr, #0"
3108  // UNDEF varies depending on where the exception came from ARM or Thumb
3109  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3110 
3111  int64_t LROffset;
3112  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3113  IntKind == "ABORT")
3114  LROffset = 4;
3115  else if (IntKind == "SWI" || IntKind == "UNDEF")
3116  LROffset = 0;
3117  else
3118  report_fatal_error("Unsupported interrupt attribute. If present, value "
3119  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3120 
3121  RetOps.insert(RetOps.begin() + 1,
3122  DAG.getConstant(LROffset, DL, MVT::i32, false));
3123 
3124  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
3125 }
3126 
3127 SDValue
3128 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3129  bool isVarArg,
3130  const SmallVectorImpl<ISD::OutputArg> &Outs,
3131  const SmallVectorImpl<SDValue> &OutVals,
3132  const SDLoc &dl, SelectionDAG &DAG) const {
3133  // CCValAssign - represent the assignment of the return value to a location.
3135 
3136  // CCState - Info about the registers and stack slots.
3137  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3138  *DAG.getContext());
3139 
3140  // Analyze outgoing return values.
3141  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3142 
3143  SDValue Flag;
3144  SmallVector<SDValue, 4> RetOps;
3145  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3146  bool isLittleEndian = Subtarget->isLittle();
3147 
3148  MachineFunction &MF = DAG.getMachineFunction();
3150  AFI->setReturnRegsCount(RVLocs.size());
3151 
3152  // Report error if cmse entry function returns structure through first ptr arg.
3153  if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3154  // Note: using an empty SDLoc(), as the first line of the function is a
3155  // better place to report than the last line.
3158  "secure entry function would return value through pointer",
3159  SDLoc().getDebugLoc());
3160  DAG.getContext()->diagnose(Diag);
3161  }
3162 
3163  // Copy the result values into the output registers.
3164  for (unsigned i = 0, realRVLocIdx = 0;
3165  i != RVLocs.size();
3166  ++i, ++realRVLocIdx) {
3167  CCValAssign &VA = RVLocs[i];
3168  assert(VA.isRegLoc() && "Can only return in registers!");
3169 
3170  SDValue Arg = OutVals[realRVLocIdx];
3171  bool ReturnF16 = false;
3172 
3173  if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3174  // Half-precision return values can be returned like this:
3175  //
3176  // t11 f16 = fadd ...
3177  // t12: i16 = bitcast t11
3178  // t13: i32 = zero_extend t12
3179  // t14: f32 = bitcast t13 <~~~~~~~ Arg
3180  //
3181  // to avoid code generation for bitcasts, we simply set Arg to the node
3182  // that produces the f16 value, t11 in this case.
3183  //
3184  if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3185  SDValue ZE = Arg.getOperand(0);
3186  if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3187  SDValue BC = ZE.getOperand(0);
3188  if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3189  Arg = BC.getOperand(0);
3190  ReturnF16 = true;
3191  }
3192  }
3193  }
3194  }
3195 
3196  switch (VA.getLocInfo()) {
3197  default: llvm_unreachable("Unknown loc info!");
3198  case CCValAssign::Full: break;
3199  case CCValAssign::BCvt:
3200  if (!ReturnF16)
3201  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3202  break;
3203  }
3204 
3205  // Mask f16 arguments if this is a CMSE nonsecure entry.
3206  auto RetVT = Outs[realRVLocIdx].ArgVT;
3207  if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3208  if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3209  Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3210  } else {
3211  auto LocBits = VA.getLocVT().getSizeInBits();
3212  auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3213  SDValue Mask =
3214  DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3215  Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3216  Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3217  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3218  }
3219  }
3220 
3221  if (VA.needsCustom() &&
3222  (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3223  if (VA.getLocVT() == MVT::v2f64) {
3224  // Extract the first half and return it in two registers.
3226  DAG.getConstant(0, dl, MVT::i32));
3227  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3228  DAG.getVTList(MVT::i32, MVT::i32), Half);
3229 
3230  Chain =
3231  DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3232  HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag);
3233  Flag = Chain.getValue(1);
3234  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3235  VA = RVLocs[++i]; // skip ahead to next loc
3236  Chain =
3237  DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3238  HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag);
3239  Flag = Chain.getValue(1);
3240  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3241  VA = RVLocs[++i]; // skip ahead to next loc
3242 
3243  // Extract the 2nd half and fall through to handle it as an f64 value.
3245  DAG.getConstant(1, dl, MVT::i32));
3246  }
3247  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3248  // available.
3249  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3250  DAG.getVTList(MVT::i32, MVT::i32), Arg);
3251  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3252  fmrrd.getValue(isLittleEndian ? 0 : 1), Flag);
3253  Flag = Chain.getValue(1);
3254  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3255  VA = RVLocs[++i]; // skip ahead to next loc
3256  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3257  fmrrd.getValue(isLittleEndian ? 1 : 0), Flag);
3258  } else
3259  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
3260 
3261  // Guarantee that all emitted copies are
3262  // stuck together, avoiding something bad.
3263  Flag = Chain.getValue(1);
3264  RetOps.push_back(DAG.getRegister(
3265  VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3266  }
3267  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3268  const MCPhysReg *I =
3269  TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3270  if (I) {
3271  for (; *I; ++I) {
3272  if (ARM::GPRRegClass.contains(*I))
3273  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3274  else if (ARM::DPRRegClass.contains(*I))
3275  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3276  else
3277  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3278  }
3279  }
3280 
3281  // Update chain and glue.
3282  RetOps[0] = Chain;
3283  if (Flag.getNode())
3284  RetOps.push_back(Flag);
3285 
3286  // CPUs which aren't M-class use a special sequence to return from
3287  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3288  // though we use "subs pc, lr, #N").
3289  //
3290  // M-class CPUs actually use a normal return sequence with a special
3291  // (hardware-provided) value in LR, so the normal code path works.
3292  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3293  !Subtarget->isMClass()) {
3294  if (Subtarget->isThumb1Only())
3295  report_fatal_error("interrupt attribute is not supported in Thumb1");
3296  return LowerInterruptReturn(RetOps, dl, DAG);
3297  }
3298 
3301  return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3302 }
3303 
3304 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3305  if (N->getNumValues() != 1)
3306  return false;
3307  if (!N->hasNUsesOfValue(1, 0))
3308  return false;
3309 
3310  SDValue TCChain = Chain;
3311  SDNode *Copy = *N->use_begin();
3312  if (Copy->getOpcode() == ISD::CopyToReg) {
3313  // If the copy has a glue operand, we conservatively assume it isn't safe to
3314  // perform a tail call.
3315  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3316  return false;
3317  TCChain = Copy->getOperand(0);
3318  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3319  SDNode *VMov = Copy;
3320  // f64 returned in a pair of GPRs.
3322  for (SDNode *U : VMov->uses()) {
3323  if (U->getOpcode() != ISD::CopyToReg)
3324  return false;
3325  Copies.insert(U);
3326  }
3327  if (Copies.size() > 2)
3328  return false;
3329 
3330  for (SDNode *U : VMov->uses()) {
3331  SDValue UseChain = U->getOperand(0);
3332  if (Copies.count(UseChain.getNode()))
3333  // Second CopyToReg
3334  Copy = U;
3335  else {
3336  // We are at the top of this chain.
3337  // If the copy has a glue operand, we conservatively assume it
3338  // isn't safe to perform a tail call.
3339  if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3340  return false;
3341  // First CopyToReg
3342  TCChain = UseChain;
3343  }
3344  }
3345  } else if (Copy->getOpcode() == ISD::BITCAST) {
3346  // f32 returned in a single GPR.
3347  if (!Copy->hasOneUse())
3348  return false;
3349  Copy = *Copy->use_begin();
3350  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3351  return false;
3352  // If the copy has a glue operand, we conservatively assume it isn't safe to
3353  // perform a tail call.
3354  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3355  return false;
3356  TCChain = Copy->getOperand(0);
3357  } else {
3358  return false;
3359  }
3360 
3361  bool HasRet = false;
3362  for (const SDNode *U : Copy->uses()) {
3363  if (U->getOpcode() != ARMISD::RET_FLAG &&
3364  U->getOpcode() != ARMISD::INTRET_FLAG)
3365  return false;
3366  HasRet = true;
3367  }
3368 
3369  if (!HasRet)
3370  return false;
3371 
3372  Chain = TCChain;
3373  return true;
3374 }
3375 
3376 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3377  if (!Subtarget->supportsTailCall())
3378  return false;
3379 
3380  if (!CI->isTailCall())
3381  return false;
3382 
3383  return true;
3384 }
3385 
3386 // Trying to write a 64 bit value so need to split into two 32 bit values first,
3387 // and pass the lower and high parts through.
3389  SDLoc DL(Op);
3390  SDValue WriteValue = Op->getOperand(2);
3391 
3392  // This function is only supposed to be called for i64 type argument.
3393  assert(WriteValue.getValueType() == MVT::i64
3394  && "LowerWRITE_REGISTER called for non-i64 type argument.");
3395 
3396  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3397  DAG.getConstant(0, DL, MVT::i32));
3398  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3399  DAG.getConstant(1, DL, MVT::i32));
3400  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3401  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3402 }
3403 
3404 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3405 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3406 // one of the above mentioned nodes. It has to be wrapped because otherwise
3407 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3408 // be used to form addressing mode. These wrapped nodes will be selected
3409 // into MOVi.
3410 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3411  SelectionDAG &DAG) const {
3412  EVT PtrVT = Op.getValueType();
3413  // FIXME there is no actual debug info here
3414  SDLoc dl(Op);
3415  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3416  SDValue Res;
3417 
3418  // When generating execute-only code Constant Pools must be promoted to the
3419  // global data section. It's a bit ugly that we can't share them across basic
3420  // blocks, but this way we guarantee that execute-only behaves correct with
3421  // position-independent addressing modes.
3422  if (Subtarget->genExecuteOnly()) {
3423  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3424  auto T = const_cast<Type*>(CP->getType());
3425  auto C = const_cast<Constant*>(CP->getConstVal());
3426  auto M = const_cast<Module*>(DAG.getMachineFunction().
3427  getFunction().getParent());
3428  auto GV = new GlobalVariable(
3429  *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3430  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
3431  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
3432  Twine(AFI->createPICLabelUId())
3433  );
3434  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3435  dl, PtrVT);
3436  return LowerGlobalAddress(GA, DAG);
3437  }
3438 
3439  if (CP->isMachineConstantPoolEntry())
3440  Res =
3441  DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3442  else
3443  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign());
3444  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3445 }
3446 
3449 }
3450 
3451 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3452  SelectionDAG &DAG) const {
3453  MachineFunction &MF = DAG.getMachineFunction();
3455  unsigned ARMPCLabelIndex = 0;
3456  SDLoc DL(Op);
3457  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3458  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3459  SDValue CPAddr;
3460  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3461  if (!IsPositionIndependent) {
3462  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3463  } else {
3464  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3465  ARMPCLabelIndex = AFI->createPICLabelUId();
3466  ARMConstantPoolValue *CPV =
3467  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3468  ARMCP::CPBlockAddress, PCAdj);
3469  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3470  }
3471  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3472  SDValue Result = DAG.getLoad(
3473  PtrVT, DL, DAG.getEntryNode(), CPAddr,
3475  if (!IsPositionIndependent)
3476  return Result;
3477  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3478  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3479 }
3480 
3481 /// Convert a TLS address reference into the correct sequence of loads
3482 /// and calls to compute the variable's address for Darwin, and return an
3483 /// SDValue containing the final node.
3484 
3485 /// Darwin only has one TLS scheme which must be capable of dealing with the
3486 /// fully general situation, in the worst case. This means:
3487 /// + "extern __thread" declaration.
3488 /// + Defined in a possibly unknown dynamic library.
3489 ///
3490 /// The general system is that each __thread variable has a [3 x i32] descriptor
3491 /// which contains information used by the runtime to calculate the address. The
3492 /// only part of this the compiler needs to know about is the first word, which
3493 /// contains a function pointer that must be called with the address of the
3494 /// entire descriptor in "r0".
3495 ///
3496 /// Since this descriptor may be in a different unit, in general access must
3497 /// proceed along the usual ARM rules. A common sequence to produce is:
3498 ///
3499 /// movw rT1, :lower16:_var$non_lazy_ptr
3500 /// movt rT1, :upper16:_var$non_lazy_ptr
3501 /// ldr r0, [rT1]
3502 /// ldr rT2, [r0]
3503 /// blx rT2
3504 /// [...address now in r0...]
3505 SDValue
3506 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3507  SelectionDAG &DAG) const {
3508  assert(Subtarget->isTargetDarwin() &&
3509  "This function expects a Darwin target");
3510  SDLoc DL(Op);
3511 
3512  // First step is to get the address of the actua global symbol. This is where
3513  // the TLS descriptor lives.
3514  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3515 
3516  // The first entry in the descriptor is a function pointer that we must call
3517  // to obtain the address of the variable.
3518  SDValue Chain = DAG.getEntryNode();
3519  SDValue FuncTLVGet = DAG.getLoad(
3520  MVT::i32, DL, Chain, DescAddr,
3524  Chain = FuncTLVGet.getValue(1);
3525 
3527  MachineFrameInfo &MFI = F.getFrameInfo();
3528  MFI.setAdjustsStack(true);
3529 
3530  // TLS calls preserve all registers except those that absolutely must be
3531  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3532  // silly).
3533  auto TRI =
3534  getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3535  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3537 
3538  // Finally, we can make the call. This is just a degenerate version of a
3539  // normal AArch64 call node: r0 takes the address of the descriptor, and
3540  // returns the address of the variable in this thread.
3541  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3542  Chain =
3544  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3545  DAG.getRegisterMask(Mask), Chain.getValue(1));
3546  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3547 }
3548 
3549 SDValue
3550 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3551  SelectionDAG &DAG) const {
3552  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3553 
3554  SDValue Chain = DAG.getEntryNode();
3555  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3556  SDLoc DL(Op);
3557 
3558  // Load the current TEB (thread environment block)
3559  SDValue Ops[] = {Chain,
3560  DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3561  DAG.getTargetConstant(15, DL, MVT::i32),
3562  DAG.getTargetConstant(0, DL, MVT::i32),
3563  DAG.getTargetConstant(13, DL, MVT::i32),
3564  DAG.getTargetConstant(0, DL, MVT::i32),
3565  DAG.getTargetConstant(2, DL, MVT::i32)};
3566  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3567  DAG.getVTList(MVT::i32, MVT::Other), Ops);
3568 
3569  SDValue TEB = CurrentTEB.getValue(0);
3570  Chain = CurrentTEB.getValue(1);
3571 
3572  // Load the ThreadLocalStoragePointer from the TEB
3573  // A pointer to the TLS array is located at offset 0x2c from the TEB.
3574  SDValue TLSArray =
3575  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3576  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3577 
3578  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3579  // offset into the TLSArray.
3580 
3581  // Load the TLS index from the C runtime
3582  SDValue TLSIndex =
3583  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3584  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3585  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3586 
3587  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3588  DAG.getConstant(2, DL, MVT::i32));
3589  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3590  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3591  MachinePointerInfo());
3592 
3593  // Get the offset of the start of the .tls section (section base)
3594  const auto *GA = cast<GlobalAddressSDNode>(Op);
3595  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3596  SDValue Offset = DAG.getLoad(
3597  PtrVT, DL, Chain,
3599  DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3601 
3602  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3603 }
3604 
3605 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
3606 SDValue
3607 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3608  SelectionDAG &DAG) const {
3609  SDLoc dl(GA);
3610  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3611  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3612  MachineFunction &MF = DAG.getMachineFunction();
3614  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3615  ARMConstantPoolValue *CPV =
3616  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3617  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3618  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3620  Argument = DAG.getLoad(
3621  PtrVT, dl, DAG.getEntryNode(), Argument,
3623  SDValue Chain = Argument.getValue(1);
3624 
3625  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3626  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3627 
3628  // call __tls_get_addr.
3629  ArgListTy Args;
3630  ArgListEntry Entry;
3631  Entry.Node = Argument;
3632  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3633  Args.push_back(Entry);
3634 
3635  // FIXME: is there useful debug info available here?
3637  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3639  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3640 
3641  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3642  return CallResult.first;
3643 }
3644 
3645 // Lower ISD::GlobalTLSAddress using the "initial exec" or
3646 // "local exec" model.
3647 SDValue
3648 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3649  SelectionDAG &DAG,
3650  TLSModel::Model model) const {
3651  const GlobalValue *GV = GA->getGlobal();
3652  SDLoc dl(GA);
3653  SDValue Offset;
3654  SDValue Chain = DAG.getEntryNode();
3655  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3656  // Get the Thread Pointer
3658 
3659  if (model == TLSModel::InitialExec) {
3660  MachineFunction &MF = DAG.getMachineFunction();
3662  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3663  // Initial exec model.
3664  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3665  ARMConstantPoolValue *CPV =
3666  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3668  true);
3669  Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3670  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3671  Offset = DAG.getLoad(
3672  PtrVT, dl, Chain, Offset,
3674  Chain = Offset.getValue(1);
3675 
3676  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3677  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3678 
3679  Offset = DAG.getLoad(
3680  PtrVT, dl, Chain, Offset,
3682  } else {
3683  // local exec model
3685  ARMConstantPoolValue *CPV =
3687  Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3688  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3689  Offset = DAG.getLoad(
3690  PtrVT, dl, Chain, Offset,
3692  }
3693 
3694  // The address of the thread local variable is the add of the thread
3695  // pointer with the offset of the variable.
3696  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3697 }
3698 
3699 SDValue
3700 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3701  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3702  if (DAG.getTarget().useEmulatedTLS())
3703  return LowerToTLSEmulatedModel(GA, DAG);
3704 
3705  if (Subtarget->isTargetDarwin())
3706  return LowerGlobalTLSAddressDarwin(Op, DAG);
3707 
3708  if (Subtarget->isTargetWindows())
3709  return LowerGlobalTLSAddressWindows(Op, DAG);
3710 
3711  // TODO: implement the "local dynamic" model
3712  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3714 
3715  switch (model) {
3718  return LowerToTLSGeneralDynamicModel(GA, DAG);
3719  case TLSModel::InitialExec:
3720  case TLSModel::LocalExec:
3721  return LowerToTLSExecModels(GA, DAG, model);
3722  }
3723  llvm_unreachable("bogus TLS model");
3724 }
3725 
3726 /// Return true if all users of V are within function F, looking through
3727 /// ConstantExprs.
3728 static bool allUsersAreInFunction(const Value *V, const Function *F) {
3729  SmallVector<const User*,4> Worklist(V->users());
3730  while (!Worklist.empty()) {
3731  auto *U = Worklist.pop_back_val();
3732  if (isa<ConstantExpr>(U)) {
3733  append_range(Worklist, U->users());
3734  continue;
3735  }
3736 
3737  auto *I = dyn_cast<Instruction>(U);
3738  if (!I || I->getParent()->getParent() != F)
3739  return false;
3740  }
3741  return true;
3742 }
3743 
3745  const GlobalValue *GV, SelectionDAG &DAG,
3746  EVT PtrVT, const SDLoc &dl) {
3747  // If we're creating a pool entry for a constant global with unnamed address,
3748  // and the global is small enough, we can emit it inline into the constant pool
3749  // to save ourselves an indirection.
3750  //
3751  // This is a win if the constant is only used in one function (so it doesn't
3752  // need to be duplicated) or duplicating the constant wouldn't increase code
3753  // size (implying the constant is no larger than 4 bytes).
3754  const Function &F = DAG.getMachineFunction().getFunction();
3755 
3756  // We rely on this decision to inline being idemopotent and unrelated to the
3757  // use-site. We know that if we inline a variable at one use site, we'll
3758  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3759  // doesn't know about this optimization, so bail out if it's enabled else
3760  // we could decide to inline here (and thus never emit the GV) but require
3761  // the GV from fast-isel generated code.
3762  if (!EnableConstpoolPromotion ||
3764  return SDValue();
3765 
3766  auto *GVar = dyn_cast<GlobalVariable>(GV);
3767  if (!GVar || !GVar->hasInitializer() ||
3768  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3769  !GVar->hasLocalLinkage())
3770  return SDValue();
3771 
3772  // If we inline a value that contains relocations, we move the relocations
3773  // from .data to .text. This is not allowed in position-independent code.
3774  auto *Init = GVar->getInitializer();
3775  if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3776  Init->needsDynamicRelocation())
3777  return SDValue();
3778 
3779  // The constant islands pass can only really deal with alignment requests
3780  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3781  // any type wanting greater alignment requirements than 4 bytes. We also
3782  // can only promote constants that are multiples of 4 bytes in size or
3783  // are paddable to a multiple of 4. Currently we only try and pad constants
3784  // that are strings for simplicity.
3785  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3786  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3787  Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3788  unsigned RequiredPadding = 4 - (Size % 4);
3789  bool PaddingPossible =
3790  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3791  if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3792  Size == 0)
3793  return SDValue();
3794 
3795  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3796  MachineFunction &MF = DAG.getMachineFunction();
3798 
3799  // We can't bloat the constant pool too much, else the ConstantIslands pass
3800  // may fail to converge. If we haven't promoted this global yet (it may have
3801  // multiple uses), and promoting it would increase the constant pool size (Sz
3802  // > 4), ensure we have space to do so up to MaxTotal.
3803  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3804  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3806  return SDValue();
3807