LLVM  16.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that ARM uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMISelLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMCallingConv.h"
18 #include "ARMConstantPoolValue.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMPerfectShuffle.h"
21 #include "ARMRegisterInfo.h"
22 #include "ARMSelectionDAGInfo.h"
23 #include "ARMSubtarget.h"
24 #include "ARMTargetTransformInfo.h"
27 #include "Utils/ARMBaseInfo.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/BitVector.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/ADT/StringExtras.h"
38 #include "llvm/ADT/StringRef.h"
39 #include "llvm/ADT/StringSwitch.h"
40 #include "llvm/ADT/Triple.h"
41 #include "llvm/ADT/Twine.h"
66 #include "llvm/IR/Attributes.h"
67 #include "llvm/IR/CallingConv.h"
68 #include "llvm/IR/Constant.h"
69 #include "llvm/IR/Constants.h"
70 #include "llvm/IR/DataLayout.h"
71 #include "llvm/IR/DebugLoc.h"
72 #include "llvm/IR/DerivedTypes.h"
73 #include "llvm/IR/Function.h"
74 #include "llvm/IR/GlobalAlias.h"
75 #include "llvm/IR/GlobalValue.h"
76 #include "llvm/IR/GlobalVariable.h"
77 #include "llvm/IR/IRBuilder.h"
78 #include "llvm/IR/InlineAsm.h"
79 #include "llvm/IR/Instruction.h"
80 #include "llvm/IR/Instructions.h"
81 #include "llvm/IR/IntrinsicInst.h"
82 #include "llvm/IR/Intrinsics.h"
83 #include "llvm/IR/IntrinsicsARM.h"
84 #include "llvm/IR/Module.h"
85 #include "llvm/IR/PatternMatch.h"
86 #include "llvm/IR/Type.h"
87 #include "llvm/IR/User.h"
88 #include "llvm/IR/Value.h"
89 #include "llvm/MC/MCInstrDesc.h"
91 #include "llvm/MC/MCRegisterInfo.h"
92 #include "llvm/MC/MCSchedule.h"
95 #include "llvm/Support/Casting.h"
96 #include "llvm/Support/CodeGen.h"
98 #include "llvm/Support/Compiler.h"
99 #include "llvm/Support/Debug.h"
101 #include "llvm/Support/KnownBits.h"
103 #include "llvm/Support/MathExtras.h"
107 #include <algorithm>
108 #include <cassert>
109 #include <cstdint>
110 #include <cstdlib>
111 #include <iterator>
112 #include <limits>
113 #include <optional>
114 #include <string>
115 #include <tuple>
116 #include <utility>
117 #include <vector>
118 
119 using namespace llvm;
120 using namespace llvm::PatternMatch;
121 
122 #define DEBUG_TYPE "arm-isel"
123 
124 STATISTIC(NumTailCalls, "Number of tail calls");
125 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
126 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
127 STATISTIC(NumConstpoolPromoted,
128  "Number of constants with their storage promoted into constant pools");
129 
130 static cl::opt<bool>
131 ARMInterworking("arm-interworking", cl::Hidden,
132  cl::desc("Enable / disable ARM interworking (for debugging only)"),
133  cl::init(true));
134 
136  "arm-promote-constant", cl::Hidden,
137  cl::desc("Enable / disable promotion of unnamed_addr constants into "
138  "constant pools"),
139  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
141  "arm-promote-constant-max-size", cl::Hidden,
142  cl::desc("Maximum size of constant to promote into a constant pool"),
143  cl::init(64));
145  "arm-promote-constant-max-total", cl::Hidden,
146  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
147  cl::init(128));
148 
150 MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
151  cl::desc("Maximum interleave factor for MVE VLDn to generate."),
152  cl::init(2));
153 
154 // The APCS parameter registers.
155 static const MCPhysReg GPRArgRegs[] = {
156  ARM::R0, ARM::R1, ARM::R2, ARM::R3
157 };
158 
159 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
160  if (VT != PromotedLdStVT) {
161  setOperationAction(ISD::LOAD, VT, Promote);
162  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
163 
164  setOperationAction(ISD::STORE, VT, Promote);
165  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
166  }
167 
168  MVT ElemTy = VT.getVectorElementType();
169  if (ElemTy != MVT::f64)
170  setOperationAction(ISD::SETCC, VT, Custom);
171  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
172  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
173  if (ElemTy == MVT::i32) {
174  setOperationAction(ISD::SINT_TO_FP, VT, Custom);
175  setOperationAction(ISD::UINT_TO_FP, VT, Custom);
176  setOperationAction(ISD::FP_TO_SINT, VT, Custom);
177  setOperationAction(ISD::FP_TO_UINT, VT, Custom);
178  } else {
179  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
180  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
181  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
182  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
183  }
184  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
185  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
186  setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
187  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
188  setOperationAction(ISD::SELECT, VT, Expand);
189  setOperationAction(ISD::SELECT_CC, VT, Expand);
190  setOperationAction(ISD::VSELECT, VT, Expand);
191  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
192  if (VT.isInteger()) {
193  setOperationAction(ISD::SHL, VT, Custom);
194  setOperationAction(ISD::SRA, VT, Custom);
195  setOperationAction(ISD::SRL, VT, Custom);
196  }
197 
198  // Neon does not support vector divide/remainder operations.
199  setOperationAction(ISD::SDIV, VT, Expand);
200  setOperationAction(ISD::UDIV, VT, Expand);
201  setOperationAction(ISD::FDIV, VT, Expand);
202  setOperationAction(ISD::SREM, VT, Expand);
203  setOperationAction(ISD::UREM, VT, Expand);
204  setOperationAction(ISD::FREM, VT, Expand);
205  setOperationAction(ISD::SDIVREM, VT, Expand);
206  setOperationAction(ISD::UDIVREM, VT, Expand);
207 
208  if (!VT.isFloatingPoint() &&
209  VT != MVT::v2i64 && VT != MVT::v1i64)
210  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
211  setOperationAction(Opcode, VT, Legal);
212  if (!VT.isFloatingPoint())
213  for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
214  setOperationAction(Opcode, VT, Legal);
215 }
216 
217 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
218  addRegisterClass(VT, &ARM::DPRRegClass);
219  addTypeForNEON(VT, MVT::f64);
220 }
221 
222 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
223  addRegisterClass(VT, &ARM::DPairRegClass);
224  addTypeForNEON(VT, MVT::v2f64);
225 }
226 
227 void ARMTargetLowering::setAllExpand(MVT VT) {
228  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
229  setOperationAction(Opc, VT, Expand);
230 
231  // We support these really simple operations even on types where all
232  // the actual arithmetic has to be broken down into simpler
233  // operations or turned into library calls.
234  setOperationAction(ISD::BITCAST, VT, Legal);
235  setOperationAction(ISD::LOAD, VT, Legal);
236  setOperationAction(ISD::STORE, VT, Legal);
237  setOperationAction(ISD::UNDEF, VT, Legal);
238 }
239 
240 void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
241  LegalizeAction Action) {
242  setLoadExtAction(ISD::EXTLOAD, From, To, Action);
243  setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
244  setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
245 }
246 
247 void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
248  const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
249 
250  for (auto VT : IntTypes) {
251  addRegisterClass(VT, &ARM::MQPRRegClass);
252  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
253  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
254  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
255  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
256  setOperationAction(ISD::SHL, VT, Custom);
257  setOperationAction(ISD::SRA, VT, Custom);
258  setOperationAction(ISD::SRL, VT, Custom);
259  setOperationAction(ISD::SMIN, VT, Legal);
260  setOperationAction(ISD::SMAX, VT, Legal);
261  setOperationAction(ISD::UMIN, VT, Legal);
262  setOperationAction(ISD::UMAX, VT, Legal);
263  setOperationAction(ISD::ABS, VT, Legal);
264  setOperationAction(ISD::SETCC, VT, Custom);
265  setOperationAction(ISD::MLOAD, VT, Custom);
266  setOperationAction(ISD::MSTORE, VT, Legal);
267  setOperationAction(ISD::CTLZ, VT, Legal);
268  setOperationAction(ISD::CTTZ, VT, Custom);
269  setOperationAction(ISD::BITREVERSE, VT, Legal);
270  setOperationAction(ISD::BSWAP, VT, Legal);
271  setOperationAction(ISD::SADDSAT, VT, Legal);
272  setOperationAction(ISD::UADDSAT, VT, Legal);
273  setOperationAction(ISD::SSUBSAT, VT, Legal);
274  setOperationAction(ISD::USUBSAT, VT, Legal);
275  setOperationAction(ISD::ABDS, VT, Legal);
276  setOperationAction(ISD::ABDU, VT, Legal);
277  setOperationAction(ISD::AVGFLOORS, VT, Legal);
278  setOperationAction(ISD::AVGFLOORU, VT, Legal);
279  setOperationAction(ISD::AVGCEILS, VT, Legal);
280  setOperationAction(ISD::AVGCEILU, VT, Legal);
281 
282  // No native support for these.
283  setOperationAction(ISD::UDIV, VT, Expand);
284  setOperationAction(ISD::SDIV, VT, Expand);
285  setOperationAction(ISD::UREM, VT, Expand);
286  setOperationAction(ISD::SREM, VT, Expand);
287  setOperationAction(ISD::UDIVREM, VT, Expand);
288  setOperationAction(ISD::SDIVREM, VT, Expand);
289  setOperationAction(ISD::CTPOP, VT, Expand);
290  setOperationAction(ISD::SELECT, VT, Expand);
291  setOperationAction(ISD::SELECT_CC, VT, Expand);
292 
293  // Vector reductions
294  setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
295  setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
296  setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
297  setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
298  setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
299  setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
300  setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
301  setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
302  setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
303 
304  if (!HasMVEFP) {
305  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
306  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
307  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
308  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
309  } else {
310  setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
311  setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
312  }
313 
314  // Pre and Post inc are supported on loads and stores
315  for (unsigned im = (unsigned)ISD::PRE_INC;
316  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
317  setIndexedLoadAction(im, VT, Legal);
318  setIndexedStoreAction(im, VT, Legal);
319  setIndexedMaskedLoadAction(im, VT, Legal);
320  setIndexedMaskedStoreAction(im, VT, Legal);
321  }
322  }
323 
324  const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
325  for (auto VT : FloatTypes) {
326  addRegisterClass(VT, &ARM::MQPRRegClass);
327  if (!HasMVEFP)
328  setAllExpand(VT);
329 
330  // These are legal or custom whether we have MVE.fp or not
331  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
332  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
333  setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
334  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
335  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
336  setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
337  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
338  setOperationAction(ISD::SETCC, VT, Custom);
339  setOperationAction(ISD::MLOAD, VT, Custom);
340  setOperationAction(ISD::MSTORE, VT, Legal);
341  setOperationAction(ISD::SELECT, VT, Expand);
342  setOperationAction(ISD::SELECT_CC, VT, Expand);
343 
344  // Pre and Post inc are supported on loads and stores
345  for (unsigned im = (unsigned)ISD::PRE_INC;
346  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
347  setIndexedLoadAction(im, VT, Legal);
348  setIndexedStoreAction(im, VT, Legal);
349  setIndexedMaskedLoadAction(im, VT, Legal);
350  setIndexedMaskedStoreAction(im, VT, Legal);
351  }
352 
353  if (HasMVEFP) {
354  setOperationAction(ISD::FMINNUM, VT, Legal);
355  setOperationAction(ISD::FMAXNUM, VT, Legal);
356  setOperationAction(ISD::FROUND, VT, Legal);
357  setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
358  setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
359  setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
360  setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
361 
362  // No native support for these.
363  setOperationAction(ISD::FDIV, VT, Expand);
364  setOperationAction(ISD::FREM, VT, Expand);
365  setOperationAction(ISD::FSQRT, VT, Expand);
366  setOperationAction(ISD::FSIN, VT, Expand);
367  setOperationAction(ISD::FCOS, VT, Expand);
368  setOperationAction(ISD::FPOW, VT, Expand);
369  setOperationAction(ISD::FLOG, VT, Expand);
370  setOperationAction(ISD::FLOG2, VT, Expand);
371  setOperationAction(ISD::FLOG10, VT, Expand);
372  setOperationAction(ISD::FEXP, VT, Expand);
373  setOperationAction(ISD::FEXP2, VT, Expand);
374  setOperationAction(ISD::FNEARBYINT, VT, Expand);
375  }
376  }
377 
378  // Custom Expand smaller than legal vector reductions to prevent false zero
379  // items being added.
380  setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom);
381  setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom);
382  setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom);
383  setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom);
384  setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
385  setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom);
386  setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom);
387  setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom);
388 
389  // We 'support' these types up to bitcast/load/store level, regardless of
390  // MVE integer-only / float support. Only doing FP data processing on the FP
391  // vector types is inhibited at integer-only level.
392  const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
393  for (auto VT : LongTypes) {
394  addRegisterClass(VT, &ARM::MQPRRegClass);
395  setAllExpand(VT);
396  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
397  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
398  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
399  setOperationAction(ISD::VSELECT, VT, Legal);
400  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
401  }
402  setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
403 
404  // We can do bitwise operations on v2i64 vectors
405  setOperationAction(ISD::AND, MVT::v2i64, Legal);
406  setOperationAction(ISD::OR, MVT::v2i64, Legal);
407  setOperationAction(ISD::XOR, MVT::v2i64, Legal);
408 
409  // It is legal to extload from v4i8 to v4i16 or v4i32.
410  addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
411  addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
412  addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
413 
414  // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
415  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
416  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
417  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
418  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal);
419  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal);
420 
421  // Some truncating stores are legal too.
422  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
423  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
424  setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
425 
426  // Pre and Post inc on these are legal, given the correct extends
427  for (unsigned im = (unsigned)ISD::PRE_INC;
428  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
429  for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
430  setIndexedLoadAction(im, VT, Legal);
431  setIndexedStoreAction(im, VT, Legal);
432  setIndexedMaskedLoadAction(im, VT, Legal);
433  setIndexedMaskedStoreAction(im, VT, Legal);
434  }
435  }
436 
437  // Predicate types
438  const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
439  for (auto VT : pTypes) {
440  addRegisterClass(VT, &ARM::VCCRRegClass);
441  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
442  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
443  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
444  setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
445  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
446  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
447  setOperationAction(ISD::SETCC, VT, Custom);
448  setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
449  setOperationAction(ISD::LOAD, VT, Custom);
450  setOperationAction(ISD::STORE, VT, Custom);
451  setOperationAction(ISD::TRUNCATE, VT, Custom);
452  setOperationAction(ISD::VSELECT, VT, Expand);
453  setOperationAction(ISD::SELECT, VT, Expand);
454  setOperationAction(ISD::SELECT_CC, VT, Expand);
455 
456  if (!HasMVEFP) {
457  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
458  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
459  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
460  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
461  }
462  }
463  setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
464  setOperationAction(ISD::TRUNCATE, MVT::v2i1, Expand);
465  setOperationAction(ISD::AND, MVT::v2i1, Expand);
466  setOperationAction(ISD::OR, MVT::v2i1, Expand);
467  setOperationAction(ISD::XOR, MVT::v2i1, Expand);
468  setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Expand);
469  setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Expand);
470  setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Expand);
471  setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Expand);
472 
473  setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
474  setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
475  setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
476  setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
477  setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
478  setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
479  setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
480  setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
481 }
482 
484  const ARMSubtarget &STI)
485  : TargetLowering(TM), Subtarget(&STI) {
486  RegInfo = Subtarget->getRegisterInfo();
487  Itins = Subtarget->getInstrItineraryData();
488 
491 
492  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
493  !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
494  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
495  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
496  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
497  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
499  }
500 
501  if (Subtarget->isTargetMachO()) {
502  // Uses VFP for Thumb libfuncs if available.
503  if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
504  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
505  static const struct {
506  const RTLIB::Libcall Op;
507  const char * const Name;
508  const ISD::CondCode Cond;
509  } LibraryCalls[] = {
510  // Single-precision floating-point arithmetic.
511  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
512  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
513  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
514  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
515 
516  // Double-precision floating-point arithmetic.
517  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
518  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
519  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
520  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
521 
522  // Single-precision comparisons.
523  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
524  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
525  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
526  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
527  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
528  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
529  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
530 
531  // Double-precision comparisons.
532  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
533  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
534  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
535  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
536  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
537  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
538  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
539 
540  // Floating-point to integer conversions.
541  // i64 conversions are done via library routines even when generating VFP
542  // instructions, so use the same ones.
543  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
544  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
545  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
546  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
547 
548  // Conversions between floating types.
549  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
550  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
551 
552  // Integer to floating-point conversions.
553  // i64 conversions are done via library routines even when generating VFP
554  // instructions, so use the same ones.
555  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
556  // e.g., __floatunsidf vs. __floatunssidfvfp.
557  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
558  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
559  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
560  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
561  };
562 
563  for (const auto &LC : LibraryCalls) {
564  setLibcallName(LC.Op, LC.Name);
565  if (LC.Cond != ISD::SETCC_INVALID)
566  setCmpLibcallCC(LC.Op, LC.Cond);
567  }
568  }
569  }
570 
571  // These libcalls are not available in 32-bit.
572  setLibcallName(RTLIB::SHL_I128, nullptr);
573  setLibcallName(RTLIB::SRL_I128, nullptr);
574  setLibcallName(RTLIB::SRA_I128, nullptr);
575  setLibcallName(RTLIB::MUL_I128, nullptr);
576  setLibcallName(RTLIB::MULO_I64, nullptr);
577  setLibcallName(RTLIB::MULO_I128, nullptr);
578 
579  // RTLIB
580  if (Subtarget->isAAPCS_ABI() &&
581  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
582  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
583  static const struct {
584  const RTLIB::Libcall Op;
585  const char * const Name;
586  const CallingConv::ID CC;
587  const ISD::CondCode Cond;
588  } LibraryCalls[] = {
589  // Double-precision floating-point arithmetic helper functions
590  // RTABI chapter 4.1.2, Table 2
591  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
592  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
593  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
594  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
595 
596  // Double-precision floating-point comparison helper functions
597  // RTABI chapter 4.1.2, Table 3
598  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
599  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
600  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
601  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
602  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
603  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
604  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
605 
606  // Single-precision floating-point arithmetic helper functions
607  // RTABI chapter 4.1.2, Table 4
608  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
609  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
610  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
611  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
612 
613  // Single-precision floating-point comparison helper functions
614  // RTABI chapter 4.1.2, Table 5
615  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
616  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
617  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
618  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
619  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
620  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
621  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
622 
623  // Floating-point to integer conversions.
624  // RTABI chapter 4.1.2, Table 6
625  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
626  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
627  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
628  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
629  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
630  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
631  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633 
634  // Conversions between floating types.
635  // RTABI chapter 4.1.2, Table 7
636  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
639 
640  // Integer to floating-point conversions.
641  // RTABI chapter 4.1.2, Table 8
642  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
643  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
644  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
645  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
646  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
647  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
648  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
649  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
650 
651  // Long long helper functions
652  // RTABI chapter 4.2, Table 9
653  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
654  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
655  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
656  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
657 
658  // Integer division functions
659  // RTABI chapter 4.3.1
660  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
661  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
662  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
663  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
664  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
665  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
666  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
667  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
668  };
669 
670  for (const auto &LC : LibraryCalls) {
671  setLibcallName(LC.Op, LC.Name);
672  setLibcallCallingConv(LC.Op, LC.CC);
673  if (LC.Cond != ISD::SETCC_INVALID)
674  setCmpLibcallCC(LC.Op, LC.Cond);
675  }
676 
677  // EABI dependent RTLIB
678  if (TM.Options.EABIVersion == EABI::EABI4 ||
679  TM.Options.EABIVersion == EABI::EABI5) {
680  static const struct {
681  const RTLIB::Libcall Op;
682  const char *const Name;
683  const CallingConv::ID CC;
684  const ISD::CondCode Cond;
685  } MemOpsLibraryCalls[] = {
686  // Memory operations
687  // RTABI chapter 4.3.4
689  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
690  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
691  };
692 
693  for (const auto &LC : MemOpsLibraryCalls) {
694  setLibcallName(LC.Op, LC.Name);
695  setLibcallCallingConv(LC.Op, LC.CC);
696  if (LC.Cond != ISD::SETCC_INVALID)
697  setCmpLibcallCC(LC.Op, LC.Cond);
698  }
699  }
700  }
701 
702  if (Subtarget->isTargetWindows()) {
703  static const struct {
704  const RTLIB::Libcall Op;
705  const char * const Name;
706  const CallingConv::ID CC;
707  } LibraryCalls[] = {
708  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
709  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
710  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
711  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
712  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
713  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
714  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
715  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
716  };
717 
718  for (const auto &LC : LibraryCalls) {
719  setLibcallName(LC.Op, LC.Name);
720  setLibcallCallingConv(LC.Op, LC.CC);
721  }
722  }
723 
724  // Use divmod compiler-rt calls for iOS 5.0 and later.
725  if (Subtarget->isTargetMachO() &&
726  !(Subtarget->isTargetIOS() &&
727  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
728  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
729  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
730  }
731 
732  // The half <-> float conversion functions are always soft-float on
733  // non-watchos platforms, but are needed for some targets which use a
734  // hard-float calling convention by default.
735  if (!Subtarget->isTargetWatchABI()) {
736  if (Subtarget->isAAPCS_ABI()) {
737  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
738  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
739  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
740  } else {
741  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
742  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
743  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
744  }
745  }
746 
747  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
748  // a __gnu_ prefix (which is the default).
749  if (Subtarget->isTargetAEABI()) {
750  static const struct {
751  const RTLIB::Libcall Op;
752  const char * const Name;
753  const CallingConv::ID CC;
754  } LibraryCalls[] = {
755  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
756  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
757  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
758  };
759 
760  for (const auto &LC : LibraryCalls) {
761  setLibcallName(LC.Op, LC.Name);
762  setLibcallCallingConv(LC.Op, LC.CC);
763  }
764  }
765 
766  if (Subtarget->isThumb1Only())
767  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
768  else
769  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
770 
771  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
772  Subtarget->hasFPRegs()) {
773  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
774  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
775 
780 
781  if (!Subtarget->hasVFP2Base())
782  setAllExpand(MVT::f32);
783  if (!Subtarget->hasFP64())
784  setAllExpand(MVT::f64);
785  }
786 
787  if (Subtarget->hasFullFP16()) {
788  addRegisterClass(MVT::f16, &ARM::HPRRegClass);
791 
794  }
795 
796  if (Subtarget->hasBF16()) {
797  addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
798  setAllExpand(MVT::bf16);
799  if (!Subtarget->hasFullFP16())
801  }
802 
803  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
804  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
805  setTruncStoreAction(VT, InnerVT, Expand);
806  addAllExtLoads(VT, InnerVT, Expand);
807  }
808 
811 
813  }
814 
817 
820 
821  if (Subtarget->hasMVEIntegerOps())
822  addMVEVectorTypes(Subtarget->hasMVEFloatOps());
823 
824  // Combine low-overhead loop intrinsics so that we can lower i1 types.
825  if (Subtarget->hasLOB()) {
827  }
828 
829  if (Subtarget->hasNEON()) {
830  addDRTypeForNEON(MVT::v2f32);
831  addDRTypeForNEON(MVT::v8i8);
832  addDRTypeForNEON(MVT::v4i16);
833  addDRTypeForNEON(MVT::v2i32);
834  addDRTypeForNEON(MVT::v1i64);
835 
836  addQRTypeForNEON(MVT::v4f32);
837  addQRTypeForNEON(MVT::v2f64);
838  addQRTypeForNEON(MVT::v16i8);
839  addQRTypeForNEON(MVT::v8i16);
840  addQRTypeForNEON(MVT::v4i32);
841  addQRTypeForNEON(MVT::v2i64);
842 
843  if (Subtarget->hasFullFP16()) {
844  addQRTypeForNEON(MVT::v8f16);
845  addDRTypeForNEON(MVT::v4f16);
846  }
847 
848  if (Subtarget->hasBF16()) {
849  addQRTypeForNEON(MVT::v8bf16);
850  addDRTypeForNEON(MVT::v4bf16);
851  }
852  }
853 
854  if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
855  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
856  // none of Neon, MVE or VFP supports any arithmetic operations on it.
860  // FIXME: Code duplication: FDIV and FREM are expanded always, see
861  // ARMTargetLowering::addTypeForNEON method for details.
864  // FIXME: Create unittest.
865  // In another words, find a way when "copysign" appears in DAG with vector
866  // operands.
868  // FIXME: Code duplication: SETCC has custom operation action, see
869  // ARMTargetLowering::addTypeForNEON method for details.
871  // FIXME: Create unittest for FNEG and for FABS.
883  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
890  }
891 
892  if (Subtarget->hasNEON()) {
893  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
894  // supported for v4f32.
909 
910  // Mark v2f32 intrinsics.
925 
926  // Neon does not support some operations on v1i64 and v2i64 types.
928  // Custom handling for some quad-vector types to detect VMULL.
932  // Custom handling for some vector types to avoid expensive expansions
937  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
938  // a destination type that is wider than the source, and nor does
939  // it have a FP_TO_[SU]INT instruction with a narrower destination than
940  // source.
949 
952 
953  // NEON does not have single instruction CTPOP for vectors with element
954  // types wider than 8-bits. However, custom lowering can leverage the
955  // v8i8/v16i8 vcnt instruction.
962 
965 
966  // NEON does not have single instruction CTTZ for vectors.
971 
976 
981 
986 
987  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
990  }
991 
992  // NEON only has FMA instructions as of VFP4.
993  if (!Subtarget->hasVFP4Base()) {
996  }
997 
1000 
1001  // It is legal to extload from v4i8 to v4i16 or v4i32.
1003  MVT::v2i32}) {
1008  }
1009  }
1010  }
1011 
1012  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1019  }
1020  if (Subtarget->hasMVEIntegerOps()) {
1023  ISD::SETCC});
1024  }
1025  if (Subtarget->hasMVEFloatOps()) {
1027  }
1028 
1029  if (!Subtarget->hasFP64()) {
1030  // When targeting a floating-point unit with only single-precision
1031  // operations, f64 is legal for the few double-precision instructions which
1032  // are present However, no double-precision operations other than moves,
1033  // loads and stores are provided by the hardware.
1070  }
1071 
1072  if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1075  if (Subtarget->hasFullFP16()) {
1078  }
1079  }
1080 
1081  if (!Subtarget->hasFP16()) {
1084  }
1085 
1087 
1088  // ARM does not have floating-point extending loads.
1089  for (MVT VT : MVT::fp_valuetypes()) {
1092  }
1093 
1094  // ... or truncating stores
1098 
1099  // ARM does not have i1 sign extending load.
1100  for (MVT VT : MVT::integer_valuetypes())
1102 
1103  // ARM supports all 4 flavors of integer indexed load / store.
1104  if (!Subtarget->isThumb1Only()) {
1105  for (unsigned im = (unsigned)ISD::PRE_INC;
1106  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1115  }
1116  } else {
1117  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1120  }
1121 
1126 
1129  if (Subtarget->hasDSP()) {
1138  }
1139  if (Subtarget->hasBaseDSP()) {
1142  }
1143 
1144  // i64 operation support.
1147  if (Subtarget->isThumb1Only()) {
1150  }
1151  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1152  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1154 
1164 
1165  // MVE lowers 64 bit shifts to lsll and lsrl
1166  // assuming that ISD::SRL and SRA of i64 are already marked custom
1167  if (Subtarget->hasMVEIntegerOps())
1169 
1170  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1171  if (Subtarget->isThumb1Only()) {
1175  }
1176 
1177  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1179 
1180  // ARM does not have ROTL.
1182  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1185  }
1188  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1191  }
1192 
1193  // @llvm.readcyclecounter requires the Performance Monitors extension.
1194  // Default to the 0 expansion on unsupported platforms.
1195  // FIXME: Technically there are older ARM CPUs that have
1196  // implementation-specific ways of obtaining this information.
1197  if (Subtarget->hasPerfMon())
1199 
1200  // Only ARMv6 has BSWAP.
1201  if (!Subtarget->hasV6Ops())
1203 
1204  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1205  : Subtarget->hasDivideInARMMode();
1206  if (!hasDivide) {
1207  // These are expanded into libcalls if the cpu doesn't have HW divider.
1210  }
1211 
1212  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1215 
1218  }
1219 
1222 
1223  // Register based DivRem for AEABI (RTABI 4.2)
1224  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1225  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1226  Subtarget->isTargetWindows()) {
1229  HasStandaloneRem = false;
1230 
1231  if (Subtarget->isTargetWindows()) {
1232  const struct {
1233  const RTLIB::Libcall Op;
1234  const char * const Name;
1235  const CallingConv::ID CC;
1236  } LibraryCalls[] = {
1237  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1238  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1239  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1240  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1241 
1242  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1243  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1244  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1245  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1246  };
1247 
1248  for (const auto &LC : LibraryCalls) {
1249  setLibcallName(LC.Op, LC.Name);
1250  setLibcallCallingConv(LC.Op, LC.CC);
1251  }
1252  } else {
1253  const struct {
1254  const RTLIB::Libcall Op;
1255  const char * const Name;
1256  const CallingConv::ID CC;
1257  } LibraryCalls[] = {
1258  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1259  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1260  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1261  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1262 
1263  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1264  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1265  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1266  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1267  };
1268 
1269  for (const auto &LC : LibraryCalls) {
1270  setLibcallName(LC.Op, LC.Name);
1271  setLibcallCallingConv(LC.Op, LC.CC);
1272  }
1273  }
1274 
1279  } else {
1282  }
1283 
1284  if (Subtarget->getTargetTriple().isOSMSVCRT()) {
1285  // MSVCRT doesn't have powi; fall back to pow
1286  setLibcallName(RTLIB::POWI_F32, nullptr);
1287  setLibcallName(RTLIB::POWI_F64, nullptr);
1288  }
1289 
1294 
1297 
1298  // Use the default implementation.
1305 
1306  if (Subtarget->isTargetWindows())
1308  else
1310 
1311  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1312  // the default expansion.
1313  InsertFencesForAtomic = false;
1314  if (Subtarget->hasAnyDataBarrier() &&
1315  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1316  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1317  // to ldrex/strex loops already.
1319  if (!Subtarget->isThumb() || !Subtarget->isMClass())
1321 
1322  // On v8, we have particularly efficient implementations of atomic fences
1323  // if they can be combined with nearby atomic loads and stores.
1324  if (!Subtarget->hasAcquireRelease() ||
1325  getTargetMachine().getOptLevel() == 0) {
1326  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1327  InsertFencesForAtomic = true;
1328  }
1329  } else {
1330  // If there's anything we can use as a barrier, go through custom lowering
1331  // for ATOMIC_FENCE.
1332  // If target has DMB in thumb, Fences can be inserted.
1333  if (Subtarget->hasDataBarrier())
1334  InsertFencesForAtomic = true;
1335 
1337  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1338 
1339  // Set them all for expansion, which will force libcalls.
1352  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1353  // Unordered/Monotonic case.
1354  if (!InsertFencesForAtomic) {
1357  }
1358  }
1359 
1360  // Compute supported atomic widths.
1361  if (Subtarget->isTargetLinux() ||
1362  (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1363  // For targets where __sync_* routines are reliably available, we use them
1364  // if necessary.
1365  //
1366  // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1367  // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1368  //
1369  // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1370  // such targets should provide __sync_* routines, which use the ARM mode
1371  // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1372  // encoding; see ARMISD::MEMBARRIER_MCR.)
1374  } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1375  Subtarget->hasForced32BitAtomics()) {
1376  // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1378  } else {
1379  // We can't assume anything about other targets; just use libatomic
1380  // routines.
1382  }
1383 
1385 
1387 
1388  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1389  if (!Subtarget->hasV6Ops()) {
1392  }
1394 
1395  if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1396  !Subtarget->isThumb1Only()) {
1397  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1398  // iff target supports vfp2.
1402  }
1403 
1404  // We want to custom lower some of our intrinsics.
1409  if (Subtarget->useSjLjEH())
1410  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1411 
1421  if (Subtarget->hasFullFP16()) {
1425  }
1426 
1428 
1431  if (Subtarget->hasFullFP16())
1436 
1437  // We don't support sin/cos/fmod/copysign/pow
1446  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1447  !Subtarget->isThumb1Only()) {
1450  }
1453 
1454  if (!Subtarget->hasVFP4Base()) {
1457  }
1458 
1459  // Various VFP goodness
1460  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1461  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1462  if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1465  }
1466 
1467  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1468  if (!Subtarget->hasFP16()) {
1471  }
1472 
1473  // Strict floating-point comparisons need custom lowering.
1480  }
1481 
1482  // Use __sincos_stret if available.
1483  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1484  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1487  }
1488 
1489  // FP-ARMv8 implements a lot of rounding-like FP operations.
1490  if (Subtarget->hasFPARMv8Base()) {
1499  if (Subtarget->hasNEON()) {
1504  }
1505 
1506  if (Subtarget->hasFP64()) {
1515  }
1516  }
1517 
1518  // FP16 often need to be promoted to call lib functions
1519  if (Subtarget->hasFullFP16()) {
1532 
1534  }
1535 
1536  if (Subtarget->hasNEON()) {
1537  // vmin and vmax aren't available in a scalar form, so we can use
1538  // a NEON instruction with an undef lane instead. This has a performance
1539  // penalty on some cores, so we don't do this unless we have been
1540  // asked to by the core tuning model.
1541  if (Subtarget->useNEONForSinglePrecisionFP()) {
1546  }
1551 
1552  if (Subtarget->hasFullFP16()) {
1557 
1562  }
1563  }
1564 
1565  // We have target-specific dag combine patterns for the following nodes:
1566  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1569 
1570  if (Subtarget->hasMVEIntegerOps())
1572 
1573  if (Subtarget->hasV6Ops())
1575  if (Subtarget->isThumb1Only())
1577  // Attempt to lower smin/smax to ssat/usat
1578  if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1579  Subtarget->isThumb2()) {
1581  }
1582 
1584 
1585  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1586  !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1588  else
1590 
1591  //// temporary - rewrite interface to use type
1592  MaxStoresPerMemset = 8;
1594  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1596  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1598 
1599  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1600  // are at least 4 bytes aligned.
1602 
1603  // Prefer likely predicted branches to selects on out-of-order cores.
1604  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1605 
1606  setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1607 
1608  setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1609 
1610  if (Subtarget->isThumb() || Subtarget->isThumb2())
1612 }
1613 
1615  return Subtarget->useSoftFloat();
1616 }
1617 
1618 // FIXME: It might make sense to define the representative register class as the
1619 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1620 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1621 // SPR's representative would be DPR_VFP2. This should work well if register
1622 // pressure tracking were modified such that a register use would increment the
1623 // pressure of the register class's representative and all of it's super
1624 // classes' representatives transitively. We have not implemented this because
1625 // of the difficulty prior to coalescing of modeling operand register classes
1626 // due to the common occurrence of cross class copies and subregister insertions
1627 // and extractions.
1628 std::pair<const TargetRegisterClass *, uint8_t>
1630  MVT VT) const {
1631  const TargetRegisterClass *RRC = nullptr;
1632  uint8_t Cost = 1;
1633  switch (VT.SimpleTy) {
1634  default:
1636  // Use DPR as representative register class for all floating point
1637  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1638  // the cost is 1 for both f32 and f64.
1639  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1640  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1641  RRC = &ARM::DPRRegClass;
1642  // When NEON is used for SP, only half of the register file is available
1643  // because operations that define both SP and DP results will be constrained
1644  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1645  // coalescing by double-counting the SP regs. See the FIXME above.
1646  if (Subtarget->useNEONForSinglePrecisionFP())
1647  Cost = 2;
1648  break;
1649  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1650  case MVT::v4f32: case MVT::v2f64:
1651  RRC = &ARM::DPRRegClass;
1652  Cost = 2;
1653  break;
1654  case MVT::v4i64:
1655  RRC = &ARM::DPRRegClass;
1656  Cost = 4;
1657  break;
1658  case MVT::v8i64:
1659  RRC = &ARM::DPRRegClass;
1660  Cost = 8;
1661  break;
1662  }
1663  return std::make_pair(RRC, Cost);
1664 }
1665 
1666 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1667 #define MAKE_CASE(V) \
1668  case V: \
1669  return #V;
1670  switch ((ARMISD::NodeType)Opcode) {
1671  case ARMISD::FIRST_NUMBER:
1672  break;
1876 #undef MAKE_CASE
1877  }
1878  return nullptr;
1879 }
1880 
1882  EVT VT) const {
1883  if (!VT.isVector())
1884  return getPointerTy(DL);
1885 
1886  // MVE has a predicate register.
1887  if ((Subtarget->hasMVEIntegerOps() &&
1888  (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1889  VT == MVT::v16i8)) ||
1890  (Subtarget->hasMVEFloatOps() &&
1891  (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1894 }
1895 
1896 /// getRegClassFor - Return the register class that should be used for the
1897 /// specified value type.
1898 const TargetRegisterClass *
1899 ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1900  (void)isDivergent;
1901  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1902  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1903  // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1904  // MVE Q registers.
1905  if (Subtarget->hasNEON()) {
1906  if (VT == MVT::v4i64)
1907  return &ARM::QQPRRegClass;
1908  if (VT == MVT::v8i64)
1909  return &ARM::QQQQPRRegClass;
1910  }
1911  if (Subtarget->hasMVEIntegerOps()) {
1912  if (VT == MVT::v4i64)
1913  return &ARM::MQQPRRegClass;
1914  if (VT == MVT::v8i64)
1915  return &ARM::MQQQQPRRegClass;
1916  }
1917  return TargetLowering::getRegClassFor(VT);
1918 }
1919 
1920 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1921 // source/dest is aligned and the copy size is large enough. We therefore want
1922 // to align such objects passed to memory intrinsics.
1924  Align &PrefAlign) const {
1925  if (!isa<MemIntrinsic>(CI))
1926  return false;
1927  MinSize = 8;
1928  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1929  // cycle faster than 4-byte aligned LDM.
1930  PrefAlign =
1931  (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1932  return true;
1933 }
1934 
1935 // Create a fast isel object.
1936 FastISel *
1938  const TargetLibraryInfo *libInfo) const {
1939  return ARM::createFastISel(funcInfo, libInfo);
1940 }
1941 
1943  unsigned NumVals = N->getNumValues();
1944  if (!NumVals)
1945  return Sched::RegPressure;
1946 
1947  for (unsigned i = 0; i != NumVals; ++i) {
1948  EVT VT = N->getValueType(i);
1949  if (VT == MVT::Glue || VT == MVT::Other)
1950  continue;
1951  if (VT.isFloatingPoint() || VT.isVector())
1952  return Sched::ILP;
1953  }
1954 
1955  if (!N->isMachineOpcode())
1956  return Sched::RegPressure;
1957 
1958  // Load are scheduled for latency even if there instruction itinerary
1959  // is not available.
1960  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1961  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1962 
1963  if (MCID.getNumDefs() == 0)
1964  return Sched::RegPressure;
1965  if (!Itins->isEmpty() &&
1966  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1967  return Sched::ILP;
1968 
1969  return Sched::RegPressure;
1970 }
1971 
1972 //===----------------------------------------------------------------------===//
1973 // Lowering Code
1974 //===----------------------------------------------------------------------===//
1975 
1976 static bool isSRL16(const SDValue &Op) {
1977  if (Op.getOpcode() != ISD::SRL)
1978  return false;
1979  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1980  return Const->getZExtValue() == 16;
1981  return false;
1982 }
1983 
1984 static bool isSRA16(const SDValue &Op) {
1985  if (Op.getOpcode() != ISD::SRA)
1986  return false;
1987  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1988  return Const->getZExtValue() == 16;
1989  return false;
1990 }
1991 
1992 static bool isSHL16(const SDValue &Op) {
1993  if (Op.getOpcode() != ISD::SHL)
1994  return false;
1995  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1996  return Const->getZExtValue() == 16;
1997  return false;
1998 }
1999 
2000 // Check for a signed 16-bit value. We special case SRA because it makes it
2001 // more simple when also looking for SRAs that aren't sign extending a
2002 // smaller value. Without the check, we'd need to take extra care with
2003 // checking order for some operations.
2004 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
2005  if (isSRA16(Op))
2006  return isSHL16(Op.getOperand(0));
2007  return DAG.ComputeNumSignBits(Op) == 17;
2008 }
2009 
2010 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2012  switch (CC) {
2013  default: llvm_unreachable("Unknown condition code!");
2014  case ISD::SETNE: return ARMCC::NE;
2015  case ISD::SETEQ: return ARMCC::EQ;
2016  case ISD::SETGT: return ARMCC::GT;
2017  case ISD::SETGE: return ARMCC::GE;
2018  case ISD::SETLT: return ARMCC::LT;
2019  case ISD::SETLE: return ARMCC::LE;
2020  case ISD::SETUGT: return ARMCC::HI;
2021  case ISD::SETUGE: return ARMCC::HS;
2022  case ISD::SETULT: return ARMCC::LO;
2023  case ISD::SETULE: return ARMCC::LS;
2024  }
2025 }
2026 
2027 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2029  ARMCC::CondCodes &CondCode2) {
2030  CondCode2 = ARMCC::AL;
2031  switch (CC) {
2032  default: llvm_unreachable("Unknown FP condition!");
2033  case ISD::SETEQ:
2034  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
2035  case ISD::SETGT:
2036  case ISD::SETOGT: CondCode = ARMCC::GT; break;
2037  case ISD::SETGE:
2038  case ISD::SETOGE: CondCode = ARMCC::GE; break;
2039  case ISD::SETOLT: CondCode = ARMCC::MI; break;
2040  case ISD::SETOLE: CondCode = ARMCC::LS; break;
2041  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2042  case ISD::SETO: CondCode = ARMCC::VC; break;
2043  case ISD::SETUO: CondCode = ARMCC::VS; break;
2044  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2045  case ISD::SETUGT: CondCode = ARMCC::HI; break;
2046  case ISD::SETUGE: CondCode = ARMCC::PL; break;
2047  case ISD::SETLT:
2048  case ISD::SETULT: CondCode = ARMCC::LT; break;
2049  case ISD::SETLE:
2050  case ISD::SETULE: CondCode = ARMCC::LE; break;
2051  case ISD::SETNE:
2052  case ISD::SETUNE: CondCode = ARMCC::NE; break;
2053  }
2054 }
2055 
2056 //===----------------------------------------------------------------------===//
2057 // Calling Convention Implementation
2058 //===----------------------------------------------------------------------===//
2059 
2060 /// getEffectiveCallingConv - Get the effective calling convention, taking into
2061 /// account presence of floating point hardware and calling convention
2062 /// limitations, such as support for variadic functions.
2064 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2065  bool isVarArg) const {
2066  switch (CC) {
2067  default:
2068  report_fatal_error("Unsupported calling convention");
2070  case CallingConv::ARM_APCS:
2071  case CallingConv::GHC:
2073  return CC;
2077  case CallingConv::Swift:
2080  case CallingConv::C:
2081  case CallingConv::Tail:
2082  if (!Subtarget->isAAPCS_ABI())
2083  return CallingConv::ARM_APCS;
2084  else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
2085  getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2086  !isVarArg)
2088  else
2089  return CallingConv::ARM_AAPCS;
2090  case CallingConv::Fast:
2092  if (!Subtarget->isAAPCS_ABI()) {
2093  if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2094  return CallingConv::Fast;
2095  return CallingConv::ARM_APCS;
2096  } else if (Subtarget->hasVFP2Base() &&
2097  !Subtarget->isThumb1Only() && !isVarArg)
2099  else
2100  return CallingConv::ARM_AAPCS;
2101  }
2102 }
2103 
2105  bool isVarArg) const {
2106  return CCAssignFnForNode(CC, false, isVarArg);
2107 }
2108 
2110  bool isVarArg) const {
2111  return CCAssignFnForNode(CC, true, isVarArg);
2112 }
2113 
2114 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2115 /// CallingConvention.
2116 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2117  bool Return,
2118  bool isVarArg) const {
2119  switch (getEffectiveCallingConv(CC, isVarArg)) {
2120  default:
2121  report_fatal_error("Unsupported calling convention");
2122  case CallingConv::ARM_APCS:
2123  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2125  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2127  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2128  case CallingConv::Fast:
2129  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2130  case CallingConv::GHC:
2131  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2133  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2135  return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2136  }
2137 }
2138 
2139 SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2140  MVT LocVT, MVT ValVT, SDValue Val) const {
2141  Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2142  Val);
2143  if (Subtarget->hasFullFP16()) {
2144  Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2145  } else {
2146  Val = DAG.getNode(ISD::TRUNCATE, dl,
2147  MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2148  Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2149  }
2150  return Val;
2151 }
2152 
2153 SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2154  MVT LocVT, MVT ValVT,
2155  SDValue Val) const {
2156  if (Subtarget->hasFullFP16()) {
2157  Val = DAG.getNode(ARMISD::VMOVrh, dl,
2158  MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2159  } else {
2160  Val = DAG.getNode(ISD::BITCAST, dl,
2161  MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2162  Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2163  MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2164  }
2165  return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2166 }
2167 
2168 /// LowerCallResult - Lower the result values of a call into the
2169 /// appropriate copies out of appropriate physical registers.
2170 SDValue ARMTargetLowering::LowerCallResult(
2171  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2172  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2173  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2174  SDValue ThisVal) const {
2175  // Assign locations to each value returned by this call.
2177  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2178  *DAG.getContext());
2179  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2180 
2181  // Copy all of the result registers out of their specified physreg.
2182  for (unsigned i = 0; i != RVLocs.size(); ++i) {
2183  CCValAssign VA = RVLocs[i];
2184 
2185  // Pass 'this' value directly from the argument to return value, to avoid
2186  // reg unit interference
2187  if (i == 0 && isThisReturn) {
2188  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2189  "unexpected return calling convention register assignment");
2190  InVals.push_back(ThisVal);
2191  continue;
2192  }
2193 
2194  SDValue Val;
2195  if (VA.needsCustom() &&
2196  (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2197  // Handle f64 or half of a v2f64.
2198  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2199  InFlag);
2200  Chain = Lo.getValue(1);
2201  InFlag = Lo.getValue(2);
2202  VA = RVLocs[++i]; // skip ahead to next loc
2203  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2204  InFlag);
2205  Chain = Hi.getValue(1);
2206  InFlag = Hi.getValue(2);
2207  if (!Subtarget->isLittle())
2208  std::swap (Lo, Hi);
2209  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2210 
2211  if (VA.getLocVT() == MVT::v2f64) {
2212  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2213  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2214  DAG.getConstant(0, dl, MVT::i32));
2215 
2216  VA = RVLocs[++i]; // skip ahead to next loc
2217  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2218  Chain = Lo.getValue(1);
2219  InFlag = Lo.getValue(2);
2220  VA = RVLocs[++i]; // skip ahead to next loc
2221  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2222  Chain = Hi.getValue(1);
2223  InFlag = Hi.getValue(2);
2224  if (!Subtarget->isLittle())
2225  std::swap (Lo, Hi);
2226  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2227  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2228  DAG.getConstant(1, dl, MVT::i32));
2229  }
2230  } else {
2231  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2232  InFlag);
2233  Chain = Val.getValue(1);
2234  InFlag = Val.getValue(2);
2235  }
2236 
2237  switch (VA.getLocInfo()) {
2238  default: llvm_unreachable("Unknown loc info!");
2239  case CCValAssign::Full: break;
2240  case CCValAssign::BCvt:
2241  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2242  break;
2243  }
2244 
2245  // f16 arguments have their size extended to 4 bytes and passed as if they
2246  // had been copied to the LSBs of a 32-bit register.
2247  // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2248  if (VA.needsCustom() &&
2249  (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2250  Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2251 
2252  InVals.push_back(Val);
2253  }
2254 
2255  return Chain;
2256 }
2257 
2258 std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2259  const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2260  bool IsTailCall, int SPDiff) const {
2261  SDValue DstAddr;
2262  MachinePointerInfo DstInfo;
2263  int32_t Offset = VA.getLocMemOffset();
2264  MachineFunction &MF = DAG.getMachineFunction();
2265 
2266  if (IsTailCall) {
2267  Offset += SPDiff;
2268  auto PtrVT = getPointerTy(DAG.getDataLayout());
2269  int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2270  int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2271  DstAddr = DAG.getFrameIndex(FI, PtrVT);
2272  DstInfo =
2274  } else {
2275  SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2276  DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2277  StackPtr, PtrOff);
2278  DstInfo =
2280  }
2281 
2282  return std::make_pair(DstAddr, DstInfo);
2283 }
2284 
2285 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2286  SDValue Chain, SDValue &Arg,
2287  RegsToPassVector &RegsToPass,
2288  CCValAssign &VA, CCValAssign &NextVA,
2289  SDValue &StackPtr,
2290  SmallVectorImpl<SDValue> &MemOpChains,
2291  bool IsTailCall,
2292  int SPDiff) const {
2293  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2294  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2295  unsigned id = Subtarget->isLittle() ? 0 : 1;
2296  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2297 
2298  if (NextVA.isRegLoc())
2299  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2300  else {
2301  assert(NextVA.isMemLoc());
2302  if (!StackPtr.getNode())
2303  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2304  getPointerTy(DAG.getDataLayout()));
2305 
2306  SDValue DstAddr;
2307  MachinePointerInfo DstInfo;
2308  std::tie(DstAddr, DstInfo) =
2309  computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2310  MemOpChains.push_back(
2311  DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2312  }
2313 }
2314 
2315 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2316  return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2318 }
2319 
2320 /// LowerCall - Lowering a call into a callseq_start <-
2321 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2322 /// nodes.
2323 SDValue
2324 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2325  SmallVectorImpl<SDValue> &InVals) const {
2326  SelectionDAG &DAG = CLI.DAG;
2327  SDLoc &dl = CLI.DL;
2329  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2331  SDValue Chain = CLI.Chain;
2332  SDValue Callee = CLI.Callee;
2333  bool &isTailCall = CLI.IsTailCall;
2334  CallingConv::ID CallConv = CLI.CallConv;
2335  bool doesNotRet = CLI.DoesNotReturn;
2336  bool isVarArg = CLI.IsVarArg;
2337 
2338  MachineFunction &MF = DAG.getMachineFunction();
2341  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2342  bool isThisReturn = false;
2343  bool isCmseNSCall = false;
2344  bool isSibCall = false;
2345  bool PreferIndirect = false;
2346  bool GuardWithBTI = false;
2347 
2348  // Lower 'returns_twice' calls to a pseudo-instruction.
2349  if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2350  !Subtarget->noBTIAtReturnTwice())
2351  GuardWithBTI = AFI->branchTargetEnforcement();
2352 
2353  // Determine whether this is a non-secure function call.
2354  if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2355  isCmseNSCall = true;
2356 
2357  // Disable tail calls if they're not supported.
2358  if (!Subtarget->supportsTailCall())
2359  isTailCall = false;
2360 
2361  // For both the non-secure calls and the returns from a CMSE entry function,
2362  // the function needs to do some extra work afte r the call, or before the
2363  // return, respectively, thus it cannot end with atail call
2364  if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2365  isTailCall = false;
2366 
2367  if (isa<GlobalAddressSDNode>(Callee)) {
2368  // If we're optimizing for minimum size and the function is called three or
2369  // more times in this block, we can improve codesize by calling indirectly
2370  // as BLXr has a 16-bit encoding.
2371  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2372  if (CLI.CB) {
2373  auto *BB = CLI.CB->getParent();
2374  PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2375  count_if(GV->users(), [&BB](const User *U) {
2376  return isa<Instruction>(U) &&
2377  cast<Instruction>(U)->getParent() == BB;
2378  }) > 2;
2379  }
2380  }
2381  if (isTailCall) {
2382  // Check if it's really possible to do a tail call.
2383  isTailCall = IsEligibleForTailCallOptimization(
2384  Callee, CallConv, isVarArg, isStructRet,
2385  MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2386  PreferIndirect);
2387 
2388  if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2389  CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2390  isSibCall = true;
2391 
2392  // We don't support GuaranteedTailCallOpt for ARM, only automatically
2393  // detected sibcalls.
2394  if (isTailCall)
2395  ++NumTailCalls;
2396  }
2397 
2398  if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2399  report_fatal_error("failed to perform tail call elimination on a call "
2400  "site marked musttail");
2401  // Analyze operands of the call, assigning locations to each operand.
2403  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2404  *DAG.getContext());
2405  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2406 
2407  // Get a count of how many bytes are to be pushed on the stack.
2408  unsigned NumBytes = CCInfo.getNextStackOffset();
2409 
2410  // SPDiff is the byte offset of the call's argument area from the callee's.
2411  // Stores to callee stack arguments will be placed in FixedStackSlots offset
2412  // by this amount for a tail call. In a sibling call it must be 0 because the
2413  // caller will deallocate the entire stack and the callee still expects its
2414  // arguments to begin at SP+0. Completely unused for non-tail calls.
2415  int SPDiff = 0;
2416 
2417  if (isTailCall && !isSibCall) {
2418  auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2419  unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2420 
2421  // Since callee will pop argument stack as a tail call, we must keep the
2422  // popped size 16-byte aligned.
2424  NumBytes = alignTo(NumBytes, StackAlign);
2425 
2426  // SPDiff will be negative if this tail call requires more space than we
2427  // would automatically have in our incoming argument space. Positive if we
2428  // can actually shrink the stack.
2429  SPDiff = NumReusableBytes - NumBytes;
2430 
2431  // If this call requires more stack than we have available from
2432  // LowerFormalArguments, tell FrameLowering to reserve space for it.
2433  if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2434  AFI->setArgRegsSaveSize(-SPDiff);
2435  }
2436 
2437  if (isSibCall) {
2438  // For sibling tail calls, memory operands are available in our caller's stack.
2439  NumBytes = 0;
2440  } else {
2441  // Adjust the stack pointer for the new arguments...
2442  // These operations are automatically eliminated by the prolog/epilog pass
2443  Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2444  }
2445 
2446  SDValue StackPtr =
2447  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2448 
2449  RegsToPassVector RegsToPass;
2450  SmallVector<SDValue, 8> MemOpChains;
2451 
2452  // During a tail call, stores to the argument area must happen after all of
2453  // the function's incoming arguments have been loaded because they may alias.
2454  // This is done by folding in a TokenFactor from LowerFormalArguments, but
2455  // there's no point in doing so repeatedly so this tracks whether that's
2456  // happened yet.
2457  bool AfterFormalArgLoads = false;
2458 
2459  // Walk the register/memloc assignments, inserting copies/loads. In the case
2460  // of tail call optimization, arguments are handled later.
2461  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2462  i != e;
2463  ++i, ++realArgIdx) {
2464  CCValAssign &VA = ArgLocs[i];
2465  SDValue Arg = OutVals[realArgIdx];
2466  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2467  bool isByVal = Flags.isByVal();
2468 
2469  // Promote the value if needed.
2470  switch (VA.getLocInfo()) {
2471  default: llvm_unreachable("Unknown loc info!");
2472  case CCValAssign::Full: break;
2473  case CCValAssign::SExt:
2474  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2475  break;
2476  case CCValAssign::ZExt:
2477  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2478  break;
2479  case CCValAssign::AExt:
2480  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2481  break;
2482  case CCValAssign::BCvt:
2483  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2484  break;
2485  }
2486 
2487  if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2488  Chain = DAG.getStackArgumentTokenFactor(Chain);
2489  AfterFormalArgLoads = true;
2490  }
2491 
2492  // f16 arguments have their size extended to 4 bytes and passed as if they
2493  // had been copied to the LSBs of a 32-bit register.
2494  // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2495  if (VA.needsCustom() &&
2496  (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2497  Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2498  } else {
2499  // f16 arguments could have been extended prior to argument lowering.
2500  // Mask them arguments if this is a CMSE nonsecure call.
2501  auto ArgVT = Outs[realArgIdx].ArgVT;
2502  if (isCmseNSCall && (ArgVT == MVT::f16)) {
2503  auto LocBits = VA.getLocVT().getSizeInBits();
2504  auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2505  SDValue Mask =
2506  DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2507  Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2508  Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2509  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2510  }
2511  }
2512 
2513  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2514  if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2516  DAG.getConstant(0, dl, MVT::i32));
2518  DAG.getConstant(1, dl, MVT::i32));
2519 
2520  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2521  StackPtr, MemOpChains, isTailCall, SPDiff);
2522 
2523  VA = ArgLocs[++i]; // skip ahead to next loc
2524  if (VA.isRegLoc()) {
2525  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2526  StackPtr, MemOpChains, isTailCall, SPDiff);
2527  } else {
2528  assert(VA.isMemLoc());
2529  SDValue DstAddr;
2530  MachinePointerInfo DstInfo;
2531  std::tie(DstAddr, DstInfo) =
2532  computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2533  MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2534  }
2535  } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2536  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2537  StackPtr, MemOpChains, isTailCall, SPDiff);
2538  } else if (VA.isRegLoc()) {
2539  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2540  Outs[0].VT == MVT::i32) {
2541  assert(VA.getLocVT() == MVT::i32 &&
2542  "unexpected calling convention register assignment");
2543  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2544  "unexpected use of 'returned'");
2545  isThisReturn = true;
2546  }
2547  const TargetOptions &Options = DAG.getTarget().Options;
2548  if (Options.EmitCallSiteInfo)
2549  CSInfo.emplace_back(VA.getLocReg(), i);
2550  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2551  } else if (isByVal) {
2552  assert(VA.isMemLoc());
2553  unsigned offset = 0;
2554 
2555  // True if this byval aggregate will be split between registers
2556  // and memory.
2557  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2558  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2559 
2560  if (CurByValIdx < ByValArgsCount) {
2561 
2562  unsigned RegBegin, RegEnd;
2563  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2564 
2565  EVT PtrVT =
2567  unsigned int i, j;
2568  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2569  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2570  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2571  SDValue Load =
2572  DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2573  DAG.InferPtrAlign(AddArg));
2574  MemOpChains.push_back(Load.getValue(1));
2575  RegsToPass.push_back(std::make_pair(j, Load));
2576  }
2577 
2578  // If parameter size outsides register area, "offset" value
2579  // helps us to calculate stack slot for remained part properly.
2580  offset = RegEnd - RegBegin;
2581 
2582  CCInfo.nextInRegsParam();
2583  }
2584 
2585  if (Flags.getByValSize() > 4*offset) {
2586  auto PtrVT = getPointerTy(DAG.getDataLayout());
2587  SDValue Dst;
2588  MachinePointerInfo DstInfo;
2589  std::tie(Dst, DstInfo) =
2590  computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2591  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2592  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2593  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2594  MVT::i32);
2595  SDValue AlignNode =
2596  DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2597 
2598  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2599  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2600  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2601  Ops));
2602  }
2603  } else {
2604  assert(VA.isMemLoc());
2605  SDValue DstAddr;
2606  MachinePointerInfo DstInfo;
2607  std::tie(DstAddr, DstInfo) =
2608  computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2609 
2610  SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2611  MemOpChains.push_back(Store);
2612  }
2613  }
2614 
2615  if (!MemOpChains.empty())
2616  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2617 
2618  // Build a sequence of copy-to-reg nodes chained together with token chain
2619  // and flag operands which copy the outgoing args into the appropriate regs.
2620  SDValue InFlag;
2621  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2622  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2623  RegsToPass[i].second, InFlag);
2624  InFlag = Chain.getValue(1);
2625  }
2626 
2627  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2628  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2629  // node so that legalize doesn't hack it.
2630  bool isDirect = false;
2631 
2632  const TargetMachine &TM = getTargetMachine();
2633  const Module *Mod = MF.getFunction().getParent();
2634  const GlobalValue *GVal = nullptr;
2635  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2636  GVal = G->getGlobal();
2637  bool isStub =
2638  !TM.shouldAssumeDSOLocal(*Mod, GVal) && Subtarget->isTargetMachO();
2639 
2640  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2641  bool isLocalARMFunc = false;
2642  auto PtrVt = getPointerTy(DAG.getDataLayout());
2643 
2644  if (Subtarget->genLongCalls()) {
2645  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2646  "long-calls codegen is not position independent!");
2647  // Handle a global address or an external symbol. If it's not one of
2648  // those, the target's already in a register, so we don't need to do
2649  // anything extra.
2650  if (isa<GlobalAddressSDNode>(Callee)) {
2651  // When generating execute-only code we use movw movt pair.
2652  // Currently execute-only is only available for architectures that
2653  // support movw movt, so we are safe to assume that.
2654  if (Subtarget->genExecuteOnly()) {
2655  assert(Subtarget->useMovt() &&
2656  "long-calls with execute-only requires movt and movw!");
2657  ++NumMovwMovt;
2658  Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2659  DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2660  } else {
2661  // Create a constant pool entry for the callee address
2662  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2664  GVal, ARMPCLabelIndex, ARMCP::CPValue, 0);
2665 
2666  // Get the address of the callee into a register
2667  SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2668  Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2669  Callee = DAG.getLoad(
2670  PtrVt, dl, DAG.getEntryNode(), Addr,
2672  }
2673  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2674  const char *Sym = S->getSymbol();
2675 
2676  // When generating execute-only code we use movw movt pair.
2677  // Currently execute-only is only available for architectures that
2678  // support movw movt, so we are safe to assume that.
2679  if (Subtarget->genExecuteOnly()) {
2680  assert(Subtarget->useMovt() &&
2681  "long-calls with execute-only requires movt and movw!");
2682  ++NumMovwMovt;
2683  Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2684  DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2685  } else {
2686  // Create a constant pool entry for the callee address
2687  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2689  *DAG.getContext(), Sym, ARMPCLabelIndex, 0);
2690 
2691  // Get the address of the callee into a register
2692  SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2693  Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2694  Callee = DAG.getLoad(
2695  PtrVt, dl, DAG.getEntryNode(), Addr,
2697  }
2698  }
2699  } else if (isa<GlobalAddressSDNode>(Callee)) {
2700  if (!PreferIndirect) {
2701  isDirect = true;
2702  bool isDef = GVal->isStrongDefinitionForLinker();
2703 
2704  // ARM call to a local ARM function is predicable.
2705  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2706  // tBX takes a register source operand.
2707  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2708  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2709  Callee = DAG.getNode(
2710  ARMISD::WrapperPIC, dl, PtrVt,
2711  DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2712  Callee = DAG.getLoad(
2713  PtrVt, dl, DAG.getEntryNode(), Callee,
2717  } else if (Subtarget->isTargetCOFF()) {
2718  assert(Subtarget->isTargetWindows() &&
2719  "Windows is the only supported COFF target");
2720  unsigned TargetFlags = ARMII::MO_NO_FLAG;
2721  if (GVal->hasDLLImportStorageClass())
2722  TargetFlags = ARMII::MO_DLLIMPORT;
2723  else if (!TM.shouldAssumeDSOLocal(*GVal->getParent(), GVal))
2724  TargetFlags = ARMII::MO_COFFSTUB;
2725  Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0,
2726  TargetFlags);
2727  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2728  Callee =
2729  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2730  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2732  } else {
2733  Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0);
2734  }
2735  }
2736  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2737  isDirect = true;
2738  // tBX takes a register source operand.
2739  const char *Sym = S->getSymbol();
2740  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2741  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2742  ARMConstantPoolValue *CPV =
2744  ARMPCLabelIndex, 4);
2745  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2746  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2747  Callee = DAG.getLoad(
2748  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2750  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2751  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2752  } else {
2753  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2754  }
2755  }
2756 
2757  if (isCmseNSCall) {
2758  assert(!isARMFunc && !isDirect &&
2759  "Cannot handle call to ARM function or direct call");
2760  if (NumBytes > 0) {
2762  "call to non-secure function would "
2763  "require passing arguments on stack",
2764  dl.getDebugLoc());
2765  DAG.getContext()->diagnose(Diag);
2766  }
2767  if (isStructRet) {
2770  "call to non-secure function would return value through pointer",
2771  dl.getDebugLoc());
2772  DAG.getContext()->diagnose(Diag);
2773  }
2774  }
2775 
2776  // FIXME: handle tail calls differently.
2777  unsigned CallOpc;
2778  if (Subtarget->isThumb()) {
2779  if (GuardWithBTI)
2780  CallOpc = ARMISD::t2CALL_BTI;
2781  else if (isCmseNSCall)
2782  CallOpc = ARMISD::tSECALL;
2783  else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2784  CallOpc = ARMISD::CALL_NOLINK;
2785  else
2786  CallOpc = ARMISD::CALL;
2787  } else {
2788  if (!isDirect && !Subtarget->hasV5TOps())
2789  CallOpc = ARMISD::CALL_NOLINK;
2790  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2791  // Emit regular call when code size is the priority
2792  !Subtarget->hasMinSize())
2793  // "mov lr, pc; b _foo" to avoid confusing the RSP
2794  CallOpc = ARMISD::CALL_NOLINK;
2795  else
2796  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2797  }
2798 
2799  // We don't usually want to end the call-sequence here because we would tidy
2800  // the frame up *after* the call, however in the ABI-changing tail-call case
2801  // we've carefully laid out the parameters so that when sp is reset they'll be
2802  // in the correct location.
2803  if (isTailCall && !isSibCall) {
2804  Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InFlag, dl);
2805  InFlag = Chain.getValue(1);
2806  }
2807 
2808  std::vector<SDValue> Ops;
2809  Ops.push_back(Chain);
2810  Ops.push_back(Callee);
2811 
2812  if (isTailCall) {
2813  Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32));
2814  }
2815 
2816  // Add argument registers to the end of the list so that they are known live
2817  // into the call.
2818  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2819  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2820  RegsToPass[i].second.getValueType()));
2821 
2822  // Add a register mask operand representing the call-preserved registers.
2823  const uint32_t *Mask;
2824  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2825  if (isThisReturn) {
2826  // For 'this' returns, use the R0-preserving mask if applicable
2827  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2828  if (!Mask) {
2829  // Set isThisReturn to false if the calling convention is not one that
2830  // allows 'returned' to be modeled in this way, so LowerCallResult does
2831  // not try to pass 'this' straight through
2832  isThisReturn = false;
2833  Mask = ARI->getCallPreservedMask(MF, CallConv);
2834  }
2835  } else
2836  Mask = ARI->getCallPreservedMask(MF, CallConv);
2837 
2838  assert(Mask && "Missing call preserved mask for calling convention");
2839  Ops.push_back(DAG.getRegisterMask(Mask));
2840 
2841  if (InFlag.getNode())
2842  Ops.push_back(InFlag);
2843 
2844  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2845  if (isTailCall) {
2847  SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2848  DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2849  return Ret;
2850  }
2851 
2852  // Returns a chain and a flag for retval copy to use.
2853  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2854  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2855  InFlag = Chain.getValue(1);
2856  DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2857 
2858  // If we're guaranteeing tail-calls will be honoured, the callee must
2859  // pop its own argument stack on return. But this call is *not* a tail call so
2860  // we need to undo that after it returns to restore the status-quo.
2861  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
2862  uint64_t CalleePopBytes =
2863  canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
2864 
2865  Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InFlag, dl);
2866  if (!Ins.empty())
2867  InFlag = Chain.getValue(1);
2868 
2869  // Handle result values, copying them out of physregs into vregs that we
2870  // return.
2871  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2872  InVals, isThisReturn,
2873  isThisReturn ? OutVals[0] : SDValue());
2874 }
2875 
2876 /// HandleByVal - Every parameter *after* a byval parameter is passed
2877 /// on the stack. Remember the next parameter register to allocate,
2878 /// and then confiscate the rest of the parameter registers to insure
2879 /// this.
2880 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2881  Align Alignment) const {
2882  // Byval (as with any stack) slots are always at least 4 byte aligned.
2883  Alignment = std::max(Alignment, Align(4));
2884 
2885  unsigned Reg = State->AllocateReg(GPRArgRegs);
2886  if (!Reg)
2887  return;
2888 
2889  unsigned AlignInRegs = Alignment.value() / 4;
2890  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2891  for (unsigned i = 0; i < Waste; ++i)
2892  Reg = State->AllocateReg(GPRArgRegs);
2893 
2894  if (!Reg)
2895  return;
2896 
2897  unsigned Excess = 4 * (ARM::R4 - Reg);
2898 
2899  // Special case when NSAA != SP and parameter size greater than size of
2900  // all remained GPR regs. In that case we can't split parameter, we must
2901  // send it to stack. We also must set NCRN to R4, so waste all
2902  // remained registers.
2903  const unsigned NSAAOffset = State->getNextStackOffset();
2904  if (NSAAOffset != 0 && Size > Excess) {
2905  while (State->AllocateReg(GPRArgRegs))
2906  ;
2907  return;
2908  }
2909 
2910  // First register for byval parameter is the first register that wasn't
2911  // allocated before this method call, so it would be "reg".
2912  // If parameter is small enough to be saved in range [reg, r4), then
2913  // the end (first after last) register would be reg + param-size-in-regs,
2914  // else parameter would be splitted between registers and stack,
2915  // end register would be r4 in this case.
2916  unsigned ByValRegBegin = Reg;
2917  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2918  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2919  // Note, first register is allocated in the beginning of function already,
2920  // allocate remained amount of registers we need.
2921  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2922  State->AllocateReg(GPRArgRegs);
2923  // A byval parameter that is split between registers and memory needs its
2924  // size truncated here.
2925  // In the case where the entire structure fits in registers, we set the
2926  // size in memory to zero.
2927  Size = std::max<int>(Size - Excess, 0);
2928 }
2929 
2930 /// MatchingStackOffset - Return true if the given stack call argument is
2931 /// already available in the same position (relatively) of the caller's
2932 /// incoming argument stack.
2933 static
2934 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2936  const TargetInstrInfo *TII) {
2937  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2938  int FI = std::numeric_limits<int>::max();
2939  if (Arg.getOpcode() == ISD::CopyFromReg) {
2940  Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2941  if (!Register::isVirtualRegister(VR))
2942  return false;
2943  MachineInstr *Def = MRI->getVRegDef(VR);
2944  if (!Def)
2945  return false;
2946  if (!Flags.isByVal()) {
2947  if (!TII->isLoadFromStackSlot(*Def, FI))
2948  return false;
2949  } else {
2950  return false;
2951  }
2952  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2953  if (Flags.isByVal())
2954  // ByVal argument is passed in as a pointer but it's now being
2955  // dereferenced. e.g.
2956  // define @foo(%struct.X* %A) {
2957  // tail call @bar(%struct.X* byval %A)
2958  // }
2959  return false;
2960  SDValue Ptr = Ld->getBasePtr();
2961  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2962  if (!FINode)
2963  return false;
2964  FI = FINode->getIndex();
2965  } else
2966  return false;
2967 
2969  if (!MFI.isFixedObjectIndex(FI))
2970  return false;
2971  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2972 }
2973 
2974 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2975 /// for tail call optimization. Targets which want to do tail call
2976 /// optimization should implement this function.
2977 bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2978  SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2979  bool isCalleeStructRet, bool isCallerStructRet,
2980  const SmallVectorImpl<ISD::OutputArg> &Outs,
2981  const SmallVectorImpl<SDValue> &OutVals,
2983  const bool isIndirect) const {
2984  MachineFunction &MF = DAG.getMachineFunction();
2985  const Function &CallerF = MF.getFunction();
2986  CallingConv::ID CallerCC = CallerF.getCallingConv();
2987 
2988  assert(Subtarget->supportsTailCall());
2989 
2990  // Indirect tail calls cannot be optimized for Thumb1 if the args
2991  // to the call take up r0-r3. The reason is that there are no legal registers
2992  // left to hold the pointer to the function to be called.
2993  // Similarly, if the function uses return address sign and authentication,
2994  // r12 is needed to hold the PAC and is not available to hold the callee
2995  // address.
2996  if (Outs.size() >= 4 &&
2997  (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) {
2998  if (Subtarget->isThumb1Only())
2999  return false;
3000  // Conservatively assume the function spills LR.
3002  return false;
3003  }
3004 
3005  // Look for obvious safe cases to perform tail call optimization that do not
3006  // require ABI changes. This is what gcc calls sibcall.
3007 
3008  // Exception-handling functions need a special set of instructions to indicate
3009  // a return to the hardware. Tail-calling another function would probably
3010  // break this.
3011  if (CallerF.hasFnAttribute("interrupt"))
3012  return false;
3013 
3014  if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
3015  return CalleeCC == CallerCC;
3016 
3017  // Also avoid sibcall optimization if either caller or callee uses struct
3018  // return semantics.
3019  if (isCalleeStructRet || isCallerStructRet)
3020  return false;
3021 
3022  // Externally-defined functions with weak linkage should not be
3023  // tail-called on ARM when the OS does not support dynamic
3024  // pre-emption of symbols, as the AAELF spec requires normal calls
3025  // to undefined weak functions to be replaced with a NOP or jump to the
3026  // next instruction. The behaviour of branch instructions in this
3027  // situation (as used for tail calls) is implementation-defined, so we
3028  // cannot rely on the linker replacing the tail call with a return.
3029  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3030  const GlobalValue *GV = G->getGlobal();
3032  if (GV->hasExternalWeakLinkage() &&
3033  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3034  return false;
3035  }
3036 
3037  // Check that the call results are passed in the same way.
3038  LLVMContext &C = *DAG.getContext();
3040  getEffectiveCallingConv(CalleeCC, isVarArg),
3041  getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3042  CCAssignFnForReturn(CalleeCC, isVarArg),
3043  CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
3044  return false;
3045  // The callee has to preserve all registers the caller needs to preserve.
3046  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3047  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3048  if (CalleeCC != CallerCC) {
3049  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3050  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3051  return false;
3052  }
3053 
3054  // If Caller's vararg or byval argument has been split between registers and
3055  // stack, do not perform tail call, since part of the argument is in caller's
3056  // local frame.
3057  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3058  if (AFI_Caller->getArgRegsSaveSize())
3059  return false;
3060 
3061  // If the callee takes no arguments then go on to check the results of the
3062  // call.
3063  if (!Outs.empty()) {
3064  // Check if stack adjustment is needed. For now, do not do this if any
3065  // argument is passed on the stack.
3067  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3068  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3069  if (CCInfo.getNextStackOffset()) {
3070  // Check if the arguments are already laid out in the right way as
3071  // the caller's fixed stack objects.
3072  MachineFrameInfo &MFI = MF.getFrameInfo();
3073  const MachineRegisterInfo *MRI = &MF.getRegInfo();
3074  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3075  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3076  i != e;
3077  ++i, ++realArgIdx) {
3078  CCValAssign &VA = ArgLocs[i];
3079  EVT RegVT = VA.getLocVT();
3080  SDValue Arg = OutVals[realArgIdx];
3081  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3082  if (VA.getLocInfo() == CCValAssign::Indirect)
3083  return false;
3084  if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3085  // f64 and vector types are split into multiple registers or
3086  // register/stack-slot combinations. The types will not match
3087  // the registers; give up on memory f64 refs until we figure
3088  // out what to do about this.
3089  if (!VA.isRegLoc())
3090  return false;
3091  if (!ArgLocs[++i].isRegLoc())
3092  return false;
3093  if (RegVT == MVT::v2f64) {
3094  if (!ArgLocs[++i].isRegLoc())
3095  return false;
3096  if (!ArgLocs[++i].isRegLoc())
3097  return false;
3098  }
3099  } else if (!VA.isRegLoc()) {
3100  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
3101  MFI, MRI, TII))
3102  return false;
3103  }
3104  }
3105  }
3106 
3107  const MachineRegisterInfo &MRI = MF.getRegInfo();
3108  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3109  return false;
3110  }
3111 
3112  return true;
3113 }
3114 
3115 bool
3116 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3117  MachineFunction &MF, bool isVarArg,
3118  const SmallVectorImpl<ISD::OutputArg> &Outs,
3119  LLVMContext &Context) const {
3121  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3122  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3123 }
3124 
3126  const SDLoc &DL, SelectionDAG &DAG) {
3127  const MachineFunction &MF = DAG.getMachineFunction();
3128  const Function &F = MF.getFunction();
3129 
3130  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3131 
3132  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3133  // version of the "preferred return address". These offsets affect the return
3134  // instruction if this is a return from PL1 without hypervisor extensions.
3135  // IRQ/FIQ: +4 "subs pc, lr, #4"
3136  // SWI: 0 "subs pc, lr, #0"
3137  // ABORT: +4 "subs pc, lr, #4"
3138  // UNDEF: +4/+2 "subs pc, lr, #0"
3139  // UNDEF varies depending on where the exception came from ARM or Thumb
3140  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3141 
3142  int64_t LROffset;
3143  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3144  IntKind == "ABORT")
3145  LROffset = 4;
3146  else if (IntKind == "SWI" || IntKind == "UNDEF")
3147  LROffset = 0;
3148  else
3149  report_fatal_error("Unsupported interrupt attribute. If present, value "
3150  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3151 
3152  RetOps.insert(RetOps.begin() + 1,
3153  DAG.getConstant(LROffset, DL, MVT::i32, false));
3154 
3155  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
3156 }
3157 
3158 SDValue
3159 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3160  bool isVarArg,
3161  const SmallVectorImpl<ISD::OutputArg> &Outs,
3162  const SmallVectorImpl<SDValue> &OutVals,
3163  const SDLoc &dl, SelectionDAG &DAG) const {
3164  // CCValAssign - represent the assignment of the return value to a location.
3166 
3167  // CCState - Info about the registers and stack slots.
3168  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3169  *DAG.getContext());
3170 
3171  // Analyze outgoing return values.
3172  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3173 
3174  SDValue Flag;
3175  SmallVector<SDValue, 4> RetOps;
3176  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3177  bool isLittleEndian = Subtarget->isLittle();
3178 
3179  MachineFunction &MF = DAG.getMachineFunction();
3181  AFI->setReturnRegsCount(RVLocs.size());
3182 
3183  // Report error if cmse entry function returns structure through first ptr arg.
3184  if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3185  // Note: using an empty SDLoc(), as the first line of the function is a
3186  // better place to report than the last line.
3189  "secure entry function would return value through pointer",
3190  SDLoc().getDebugLoc());
3191  DAG.getContext()->diagnose(Diag);
3192  }
3193 
3194  // Copy the result values into the output registers.
3195  for (unsigned i = 0, realRVLocIdx = 0;
3196  i != RVLocs.size();
3197  ++i, ++realRVLocIdx) {
3198  CCValAssign &VA = RVLocs[i];
3199  assert(VA.isRegLoc() && "Can only return in registers!");
3200 
3201  SDValue Arg = OutVals[realRVLocIdx];
3202  bool ReturnF16 = false;
3203 
3204  if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3205  // Half-precision return values can be returned like this:
3206  //
3207  // t11 f16 = fadd ...
3208  // t12: i16 = bitcast t11
3209  // t13: i32 = zero_extend t12
3210  // t14: f32 = bitcast t13 <~~~~~~~ Arg
3211  //
3212  // to avoid code generation for bitcasts, we simply set Arg to the node
3213  // that produces the f16 value, t11 in this case.
3214  //
3215  if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3216  SDValue ZE = Arg.getOperand(0);
3217  if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3218  SDValue BC = ZE.getOperand(0);
3219  if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3220  Arg = BC.getOperand(0);
3221  ReturnF16 = true;
3222  }
3223  }
3224  }
3225  }
3226 
3227  switch (VA.getLocInfo()) {
3228  default: llvm_unreachable("Unknown loc info!");
3229  case CCValAssign::Full: break;
3230  case CCValAssign::BCvt:
3231  if (!ReturnF16)
3232  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3233  break;
3234  }
3235 
3236  // Mask f16 arguments if this is a CMSE nonsecure entry.
3237  auto RetVT = Outs[realRVLocIdx].ArgVT;
3238  if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3239  if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3240  Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3241  } else {
3242  auto LocBits = VA.getLocVT().getSizeInBits();
3243  auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3244  SDValue Mask =
3245  DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3246  Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3247  Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3248  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3249  }
3250  }
3251 
3252  if (VA.needsCustom() &&
3253  (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3254  if (VA.getLocVT() == MVT::v2f64) {
3255  // Extract the first half and return it in two registers.
3257  DAG.getConstant(0, dl, MVT::i32));
3258  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3259  DAG.getVTList(MVT::i32, MVT::i32), Half);
3260 
3261  Chain =
3262  DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3263  HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag);
3264  Flag = Chain.getValue(1);
3265  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3266  VA = RVLocs[++i]; // skip ahead to next loc
3267  Chain =
3268  DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3269  HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag);
3270  Flag = Chain.getValue(1);
3271  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3272  VA = RVLocs[++i]; // skip ahead to next loc
3273 
3274  // Extract the 2nd half and fall through to handle it as an f64 value.
3276  DAG.getConstant(1, dl, MVT::i32));
3277  }
3278  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3279  // available.
3280  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3281  DAG.getVTList(MVT::i32, MVT::i32), Arg);
3282  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3283  fmrrd.getValue(isLittleEndian ? 0 : 1), Flag);
3284  Flag = Chain.getValue(1);
3285  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3286  VA = RVLocs[++i]; // skip ahead to next loc
3287  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3288  fmrrd.getValue(isLittleEndian ? 1 : 0), Flag);
3289  } else
3290  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
3291 
3292  // Guarantee that all emitted copies are
3293  // stuck together, avoiding something bad.
3294  Flag = Chain.getValue(1);
3295  RetOps.push_back(DAG.getRegister(
3296  VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3297  }
3298  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3299  const MCPhysReg *I =
3300  TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3301  if (I) {
3302  for (; *I; ++I) {
3303  if (ARM::GPRRegClass.contains(*I))
3304  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3305  else if (ARM::DPRRegClass.contains(*I))
3306  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3307  else
3308  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3309  }
3310  }
3311 
3312  // Update chain and glue.
3313  RetOps[0] = Chain;
3314  if (Flag.getNode())
3315  RetOps.push_back(Flag);
3316 
3317  // CPUs which aren't M-class use a special sequence to return from
3318  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3319  // though we use "subs pc, lr, #N").
3320  //
3321  // M-class CPUs actually use a normal return sequence with a special
3322  // (hardware-provided) value in LR, so the normal code path works.
3323  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3324  !Subtarget->isMClass()) {
3325  if (Subtarget->isThumb1Only())
3326  report_fatal_error("interrupt attribute is not supported in Thumb1");
3327  return LowerInterruptReturn(RetOps, dl, DAG);
3328  }
3329 
3332  return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3333 }
3334 
3335 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3336  if (N->getNumValues() != 1)
3337  return false;
3338  if (!N->hasNUsesOfValue(1, 0))
3339  return false;
3340 
3341  SDValue TCChain = Chain;
3342  SDNode *Copy = *N->use_begin();
3343  if (Copy->getOpcode() == ISD::CopyToReg) {
3344  // If the copy has a glue operand, we conservatively assume it isn't safe to
3345  // perform a tail call.
3346  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3347  return false;
3348  TCChain = Copy->getOperand(0);
3349  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3350  SDNode *VMov = Copy;
3351  // f64 returned in a pair of GPRs.
3353  for (SDNode *U : VMov->uses()) {
3354  if (U->getOpcode() != ISD::CopyToReg)
3355  return false;
3356  Copies.insert(U);
3357  }
3358  if (Copies.size() > 2)
3359  return false;
3360 
3361  for (SDNode *U : VMov->uses()) {
3362  SDValue UseChain = U->getOperand(0);
3363  if (Copies.count(UseChain.getNode()))
3364  // Second CopyToReg
3365  Copy = U;
3366  else {
3367  // We are at the top of this chain.
3368  // If the copy has a glue operand, we conservatively assume it
3369  // isn't safe to perform a tail call.
3370  if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3371  return false;
3372  // First CopyToReg
3373  TCChain = UseChain;
3374  }
3375  }
3376  } else if (Copy->getOpcode() == ISD::BITCAST) {
3377  // f32 returned in a single GPR.
3378  if (!Copy->hasOneUse())
3379  return false;
3380  Copy = *Copy->use_begin();
3381  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3382  return false;
3383  // If the copy has a glue operand, we conservatively assume it isn't safe to
3384  // perform a tail call.
3385  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3386  return false;
3387  TCChain = Copy->getOperand(0);
3388  } else {
3389  return false;
3390  }
3391 
3392  bool HasRet = false;
3393  for (const SDNode *U : Copy->uses()) {
3394  if (U->getOpcode() != ARMISD::RET_FLAG &&
3395  U->getOpcode() != ARMISD::INTRET_FLAG)
3396  return false;
3397  HasRet = true;
3398  }
3399 
3400  if (!HasRet)
3401  return false;
3402 
3403  Chain = TCChain;
3404  return true;
3405 }
3406 
3407 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3408  if (!Subtarget->supportsTailCall())
3409  return false;
3410 
3411  if (!CI->isTailCall())
3412  return false;
3413 
3414  return true;
3415 }
3416 
3417 // Trying to write a 64 bit value so need to split into two 32 bit values first,
3418 // and pass the lower and high parts through.
3420  SDLoc DL(Op);
3421  SDValue WriteValue = Op->getOperand(2);
3422 
3423  // This function is only supposed to be called for i64 type argument.
3424  assert(WriteValue.getValueType() == MVT::i64
3425  && "LowerWRITE_REGISTER called for non-i64 type argument.");
3426 
3427  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3428  DAG.getConstant(0, DL, MVT::i32));
3429  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3430  DAG.getConstant(1, DL, MVT::i32));
3431  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3432  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3433 }
3434 
3435 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3436 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3437 // one of the above mentioned nodes. It has to be wrapped because otherwise
3438 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3439 // be used to form addressing mode. These wrapped nodes will be selected
3440 // into MOVi.
3441 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3442  SelectionDAG &DAG) const {
3443  EVT PtrVT = Op.getValueType();
3444  // FIXME there is no actual debug info here
3445  SDLoc dl(Op);
3446  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3447  SDValue Res;
3448 
3449  // When generating execute-only code Constant Pools must be promoted to the
3450  // global data section. It's a bit ugly that we can't share them across basic
3451  // blocks, but this way we guarantee that execute-only behaves correct with
3452  // position-independent addressing modes.
3453  if (Subtarget->genExecuteOnly()) {
3454  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3455  auto T = const_cast<Type*>(CP->getType());
3456  auto C = const_cast<Constant*>(CP->getConstVal());
3457  auto M = const_cast<Module*>(DAG.getMachineFunction().
3458  getFunction().getParent());
3459  auto GV = new GlobalVariable(
3460  *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3461  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
3462  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
3463  Twine(AFI->createPICLabelUId())
3464  );
3465  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3466  dl, PtrVT);
3467  return LowerGlobalAddress(GA, DAG);
3468  }
3469 
3470  // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3471  // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3472  Align CPAlign = CP->getAlign();
3473  if (Subtarget->isThumb1Only())
3474  CPAlign = std::max(CPAlign, Align(4));
3475  if (CP->isMachineConstantPoolEntry())
3476  Res =
3477  DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3478  else
3479  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3480  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3481 }
3482 
3485 }
3486 
3487 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3488  SelectionDAG &DAG) const {
3489  MachineFunction &MF = DAG.getMachineFunction();
3491  unsigned ARMPCLabelIndex = 0;
3492  SDLoc DL(Op);
3493  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3494  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3495  SDValue CPAddr;
3496  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3497  if (!IsPositionIndependent) {
3498  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3499  } else {
3500  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3501  ARMPCLabelIndex = AFI->createPICLabelUId();
3502  ARMConstantPoolValue *CPV =
3503  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3504  ARMCP::CPBlockAddress, PCAdj);
3505  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3506  }
3507  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3508  SDValue Result = DAG.getLoad(
3509  PtrVT, DL, DAG.getEntryNode(), CPAddr,
3511  if (!IsPositionIndependent)
3512  return Result;
3513  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3514  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3515 }
3516 
3517 /// Convert a TLS address reference into the correct sequence of loads
3518 /// and calls to compute the variable's address for Darwin, and return an
3519 /// SDValue containing the final node.
3520 
3521 /// Darwin only has one TLS scheme which must be capable of dealing with the
3522 /// fully general situation, in the worst case. This means:
3523 /// + "extern __thread" declaration.
3524 /// + Defined in a possibly unknown dynamic library.
3525 ///
3526 /// The general system is that each __thread variable has a [3 x i32] descriptor
3527 /// which contains information used by the runtime to calculate the address. The
3528 /// only part of this the compiler needs to know about is the first word, which
3529 /// contains a function pointer that must be called with the address of the
3530 /// entire descriptor in "r0".
3531 ///
3532 /// Since this descriptor may be in a different unit, in general access must
3533 /// proceed along the usual ARM rules. A common sequence to produce is:
3534 ///
3535 /// movw rT1, :lower16:_var$non_lazy_ptr
3536 /// movt rT1, :upper16:_var$non_lazy_ptr
3537 /// ldr r0, [rT1]
3538 /// ldr rT2, [r0]
3539 /// blx rT2
3540 /// [...address now in r0...]
3541 SDValue
3542 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3543  SelectionDAG &DAG) const {
3544  assert(Subtarget->isTargetDarwin() &&
3545  "This function expects a Darwin target");
3546  SDLoc DL(Op);
3547 
3548  // First step is to get the address of the actua global symbol. This is where
3549  // the TLS descriptor lives.
3550  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3551 
3552  // The first entry in the descriptor is a function pointer that we must call
3553  // to obtain the address of the variable.
3554  SDValue Chain = DAG.getEntryNode();
3555  SDValue FuncTLVGet = DAG.getLoad(
3556  MVT::i32, DL, Chain, DescAddr,
3560  Chain = FuncTLVGet.getValue(1);
3561 
3563  MachineFrameInfo &MFI = F.getFrameInfo();
3564  MFI.setAdjustsStack(true);
3565 
3566  // TLS calls preserve all registers except those that absolutely must be
3567  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3568  // silly).
3569  auto TRI =
3570  getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3571  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3573 
3574  // Finally, we can make the call. This is just a degenerate version of a
3575  // normal AArch64 call node: r0 takes the address of the descriptor, and
3576  // returns the address of the variable in this thread.
3577  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3578  Chain =
3580  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3581  DAG.getRegisterMask(Mask), Chain.getValue(1));
3582  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3583 }
3584 
3585 SDValue
3586 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3587  SelectionDAG &DAG) const {
3588  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3589 
3590  SDValue Chain = DAG.getEntryNode();
3591  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3592  SDLoc DL(Op);
3593 
3594  // Load the current TEB (thread environment block)
3595  SDValue Ops[] = {Chain,
3596  DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3597  DAG.getTargetConstant(15, DL, MVT::i32),
3598  DAG.getTargetConstant(0, DL, MVT::i32),
3599  DAG.getTargetConstant(13, DL, MVT::i32),
3600  DAG.getTargetConstant(0, DL, MVT::i32),
3601  DAG.getTargetConstant(2, DL, MVT::i32)};
3602  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3603  DAG.getVTList(MVT::i32, MVT::Other), Ops);
3604 
3605  SDValue TEB = CurrentTEB.getValue(0);
3606  Chain = CurrentTEB.getValue(1);
3607 
3608  // Load the ThreadLocalStoragePointer from the TEB
3609  // A pointer to the TLS array is located at offset 0x2c from the TEB.
3610  SDValue TLSArray =
3611  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3612  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3613 
3614  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3615  // offset into the TLSArray.
3616 
3617  // Load the TLS index from the C runtime
3618  SDValue TLSIndex =
3619  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3620  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3621  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3622 
3623  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3624  DAG.getConstant(2, DL, MVT::i32));
3625  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3626  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3627  MachinePointerInfo());
3628 
3629  // Get the offset of the start of the .tls section (section base)
3630  const auto *GA = cast<GlobalAddressSDNode>(Op);
3631  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3632  SDValue Offset = DAG.getLoad(
3633  PtrVT, DL, Chain,
3635  DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3637 
3638  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3639 }
3640 
3641 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
3642 SDValue
3643 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3644  SelectionDAG &DAG) const {
3645  SDLoc dl(GA);
3646  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3647  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3648  MachineFunction &MF = DAG.getMachineFunction();
3650  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3651  ARMConstantPoolValue *CPV =
3652  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3653  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3654  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3656  Argument = DAG.getLoad(
3657  PtrVT, dl, DAG.getEntryNode(), Argument,
3659  SDValue Chain = Argument.getValue(1);
3660 
3661  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3662  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3663 
3664  // call __tls_get_addr.
3665  ArgListTy Args;
3666  ArgListEntry Entry;
3667  Entry.Node = Argument;
3668  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3669  Args.push_back(Entry);
3670 
3671  // FIXME: is there useful debug info available here?
3673  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3675  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3676 
3677  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3678  return CallResult.first;
3679 }
3680 
3681 // Lower ISD::GlobalTLSAddress using the "initial exec" or
3682 // "local exec" model.
3683 SDValue
3684 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3685  SelectionDAG &DAG,
3686  TLSModel::Model model) const {
3687  const GlobalValue *GV = GA->getGlobal();
3688  SDLoc dl(GA);
3689  SDValue Offset;
3690  SDValue Chain = DAG.getEntryNode();
3691  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3692  // Get the Thread Pointer
3694 
3695  if (model == TLSModel::InitialExec) {
3696  MachineFunction &MF = DAG.getMachineFunction();
3698  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3699  // Initial exec model.
3700  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3701  ARMConstantPoolValue *CPV =
3702  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3704  true);
3705  Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3706  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3707  Offset = DAG.getLoad(
3708  PtrVT, dl, Chain, Offset,
3710  Chain = Offset.getValue(1);
3711 
3712  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3713  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3714 
3715  Offset = DAG.getLoad(
3716  PtrVT, dl, Chain, Offset,
3718  } else {
3719  // local exec model
3721  ARMConstantPoolValue *CPV =
3723  Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3724  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3725  Offset = DAG.getLoad(
3726  PtrVT, dl, Chain, Offset,
3728  }
3729 
3730  // The address of the thread local variable is the add of the thread
3731  // pointer with the offset of the variable.
3732  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3733 }
3734 
3735 SDValue
3736 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3737  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3738  if (DAG.getTarget().useEmulatedTLS())
3739  return LowerToTLSEmulatedModel(GA, DAG);
3740 
3741  if (Subtarget->isTargetDarwin())
3742  return LowerGlobalTLSAddressDarwin(Op, DAG);
3743 
3744  if (Subtarget->isTargetWindows())
3745  return LowerGlobalTLSAddressWindows(Op, DAG);
3746 
3747  // TODO: implement the "local dynamic" model
3748  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3750 
3751  switch (model) {
3754  return LowerToTLSGeneralDynamicModel(GA, DAG);
3755  case TLSModel::InitialExec:
3756  case TLSModel::LocalExec:
3757  return LowerToTLSExecModels(GA, DAG, model);
3758  }
3759  llvm_unreachable("bogus TLS model");
3760 }
3761 
3762 /// Return true if all users of V are within function F, looking through
3763 /// ConstantExprs.
3764 static bool allUsersAreInFunction(const Value *V, const Function *F) {
3765  SmallVector<const User*,4> Worklist(V->users());
3766  while (!Worklist.empty()) {
3767  auto *U = Worklist.pop_back_val();
3768  if (isa<ConstantExpr>(U)) {
3769  append_range(Worklist, U->users());
3770  continue;
3771  }
3772 
3773  auto *I = dyn_cast<Instruction>(U);
3774  if (!I || I->getParent()->getParent() != F)
3775  return false;
3776  }
3777  return true;
3778 }
3779 
3781  const GlobalValue *GV, SelectionDAG &DAG,
3782  EVT PtrVT, const SDLoc &dl) {
3783  // If we're creating a pool entry for a constant global with unnamed address,
3784  // and the global is small enough, we can emit it inline into the constant pool
3785  // to save ourselves an indirection.
3786  //
3787  // This is a win if the constant is only used in one function (so it doesn't
3788  // need to be duplicated) or duplicating the constant wouldn't increase code
3789  // size (implying the constant is no larger than 4 bytes).
3790  const Function &F = DAG.getMachineFunction().getFunction();
3791 
3792  // We rely on this decision to inline being idemopotent and unrelated to the
3793  // use-site. We know that if we inline a variable at one use site, we'll
3794  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel