LLVM  9.0.0svn
ARMISelLowering.cpp
Go to the documentation of this file.
1 //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that ARM uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMISelLowering.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMBaseRegisterInfo.h"
17 #include "ARMCallingConv.h"
18 #include "ARMConstantPoolValue.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMPerfectShuffle.h"
21 #include "ARMRegisterInfo.h"
22 #include "ARMSelectionDAGInfo.h"
23 #include "ARMSubtarget.h"
26 #include "Utils/ARMBaseInfo.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/BitVector.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/ADT/SmallPtrSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
36 #include "llvm/ADT/StringExtras.h"
37 #include "llvm/ADT/StringRef.h"
38 #include "llvm/ADT/StringSwitch.h"
39 #include "llvm/ADT/Triple.h"
40 #include "llvm/ADT/Twine.h"
64 #include "llvm/IR/Attributes.h"
65 #include "llvm/IR/CallingConv.h"
66 #include "llvm/IR/Constant.h"
67 #include "llvm/IR/Constants.h"
68 #include "llvm/IR/DataLayout.h"
69 #include "llvm/IR/DebugLoc.h"
70 #include "llvm/IR/DerivedTypes.h"
71 #include "llvm/IR/Function.h"
72 #include "llvm/IR/GlobalAlias.h"
73 #include "llvm/IR/GlobalValue.h"
74 #include "llvm/IR/GlobalVariable.h"
75 #include "llvm/IR/IRBuilder.h"
76 #include "llvm/IR/InlineAsm.h"
77 #include "llvm/IR/Instruction.h"
78 #include "llvm/IR/Instructions.h"
79 #include "llvm/IR/IntrinsicInst.h"
80 #include "llvm/IR/Intrinsics.h"
81 #include "llvm/IR/Module.h"
82 #include "llvm/IR/PatternMatch.h"
83 #include "llvm/IR/Type.h"
84 #include "llvm/IR/User.h"
85 #include "llvm/IR/Value.h"
86 #include "llvm/MC/MCInstrDesc.h"
88 #include "llvm/MC/MCRegisterInfo.h"
89 #include "llvm/MC/MCSchedule.h"
92 #include "llvm/Support/Casting.h"
93 #include "llvm/Support/CodeGen.h"
95 #include "llvm/Support/Compiler.h"
96 #include "llvm/Support/Debug.h"
98 #include "llvm/Support/KnownBits.h"
100 #include "llvm/Support/MathExtras.h"
104 #include <algorithm>
105 #include <cassert>
106 #include <cstdint>
107 #include <cstdlib>
108 #include <iterator>
109 #include <limits>
110 #include <string>
111 #include <tuple>
112 #include <utility>
113 #include <vector>
114 
115 using namespace llvm;
116 using namespace llvm::PatternMatch;
117 
118 #define DEBUG_TYPE "arm-isel"
119 
120 STATISTIC(NumTailCalls, "Number of tail calls");
121 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
122 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
123 STATISTIC(NumConstpoolPromoted,
124  "Number of constants with their storage promoted into constant pools");
125 
126 static cl::opt<bool>
127 ARMInterworking("arm-interworking", cl::Hidden,
128  cl::desc("Enable / disable ARM interworking (for debugging only)"),
129  cl::init(true));
130 
132  "arm-promote-constant", cl::Hidden,
133  cl::desc("Enable / disable promotion of unnamed_addr constants into "
134  "constant pools"),
135  cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
137  "arm-promote-constant-max-size", cl::Hidden,
138  cl::desc("Maximum size of constant to promote into a constant pool"),
139  cl::init(64));
141  "arm-promote-constant-max-total", cl::Hidden,
142  cl::desc("Maximum size of ALL constants to promote into a constant pool"),
143  cl::init(128));
144 
145 // The APCS parameter registers.
146 static const MCPhysReg GPRArgRegs[] = {
147  ARM::R0, ARM::R1, ARM::R2, ARM::R3
148 };
149 
150 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
151  MVT PromotedBitwiseVT) {
152  if (VT != PromotedLdStVT) {
153  setOperationAction(ISD::LOAD, VT, Promote);
154  AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
155 
156  setOperationAction(ISD::STORE, VT, Promote);
157  AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
158  }
159 
160  MVT ElemTy = VT.getVectorElementType();
161  if (ElemTy != MVT::f64)
162  setOperationAction(ISD::SETCC, VT, Custom);
163  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
164  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
165  if (ElemTy == MVT::i32) {
166  setOperationAction(ISD::SINT_TO_FP, VT, Custom);
167  setOperationAction(ISD::UINT_TO_FP, VT, Custom);
168  setOperationAction(ISD::FP_TO_SINT, VT, Custom);
169  setOperationAction(ISD::FP_TO_UINT, VT, Custom);
170  } else {
171  setOperationAction(ISD::SINT_TO_FP, VT, Expand);
172  setOperationAction(ISD::UINT_TO_FP, VT, Expand);
173  setOperationAction(ISD::FP_TO_SINT, VT, Expand);
174  setOperationAction(ISD::FP_TO_UINT, VT, Expand);
175  }
176  setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
177  setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
178  setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
179  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
180  setOperationAction(ISD::SELECT, VT, Expand);
181  setOperationAction(ISD::SELECT_CC, VT, Expand);
182  setOperationAction(ISD::VSELECT, VT, Expand);
183  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
184  if (VT.isInteger()) {
185  setOperationAction(ISD::SHL, VT, Custom);
186  setOperationAction(ISD::SRA, VT, Custom);
187  setOperationAction(ISD::SRL, VT, Custom);
188  }
189 
190  // Promote all bit-wise operations.
191  if (VT.isInteger() && VT != PromotedBitwiseVT) {
192  setOperationAction(ISD::AND, VT, Promote);
193  AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
194  setOperationAction(ISD::OR, VT, Promote);
195  AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
196  setOperationAction(ISD::XOR, VT, Promote);
197  AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
198  }
199 
200  // Neon does not support vector divide/remainder operations.
201  setOperationAction(ISD::SDIV, VT, Expand);
202  setOperationAction(ISD::UDIV, VT, Expand);
203  setOperationAction(ISD::FDIV, VT, Expand);
204  setOperationAction(ISD::SREM, VT, Expand);
205  setOperationAction(ISD::UREM, VT, Expand);
206  setOperationAction(ISD::FREM, VT, Expand);
207 
208  if (!VT.isFloatingPoint() &&
209  VT != MVT::v2i64 && VT != MVT::v1i64)
210  for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
211  setOperationAction(Opcode, VT, Legal);
212 }
213 
214 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
215  addRegisterClass(VT, &ARM::DPRRegClass);
216  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
217 }
218 
219 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
220  addRegisterClass(VT, &ARM::DPairRegClass);
221  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
222 }
223 
225  const ARMSubtarget &STI)
226  : TargetLowering(TM), Subtarget(&STI) {
227  RegInfo = Subtarget->getRegisterInfo();
228  Itins = Subtarget->getInstrItineraryData();
229 
232 
233  if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
234  !Subtarget->isTargetWatchOS()) {
235  bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
236  for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
237  setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
238  IsHFTarget ? CallingConv::ARM_AAPCS_VFP
240  }
241 
242  if (Subtarget->isTargetMachO()) {
243  // Uses VFP for Thumb libfuncs if available.
244  if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
245  Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
246  static const struct {
247  const RTLIB::Libcall Op;
248  const char * const Name;
249  const ISD::CondCode Cond;
250  } LibraryCalls[] = {
251  // Single-precision floating-point arithmetic.
252  { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
253  { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
254  { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
255  { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
256 
257  // Double-precision floating-point arithmetic.
258  { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
259  { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
260  { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
261  { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
262 
263  // Single-precision comparisons.
264  { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
265  { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
266  { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
267  { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
268  { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
269  { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
270  { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
271  { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
272 
273  // Double-precision comparisons.
274  { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
275  { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
276  { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
277  { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
278  { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
279  { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
280  { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
281  { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
282 
283  // Floating-point to integer conversions.
284  // i64 conversions are done via library routines even when generating VFP
285  // instructions, so use the same ones.
286  { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
287  { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
288  { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
289  { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
290 
291  // Conversions between floating types.
292  { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
293  { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
294 
295  // Integer to floating-point conversions.
296  // i64 conversions are done via library routines even when generating VFP
297  // instructions, so use the same ones.
298  // FIXME: There appears to be some naming inconsistency in ARM libgcc:
299  // e.g., __floatunsidf vs. __floatunssidfvfp.
300  { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
301  { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
302  { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
303  { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
304  };
305 
306  for (const auto &LC : LibraryCalls) {
307  setLibcallName(LC.Op, LC.Name);
308  if (LC.Cond != ISD::SETCC_INVALID)
309  setCmpLibcallCC(LC.Op, LC.Cond);
310  }
311  }
312  }
313 
314  // These libcalls are not available in 32-bit.
315  setLibcallName(RTLIB::SHL_I128, nullptr);
316  setLibcallName(RTLIB::SRL_I128, nullptr);
317  setLibcallName(RTLIB::SRA_I128, nullptr);
318 
319  // RTLIB
320  if (Subtarget->isAAPCS_ABI() &&
321  (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
322  Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
323  static const struct {
324  const RTLIB::Libcall Op;
325  const char * const Name;
326  const CallingConv::ID CC;
327  const ISD::CondCode Cond;
328  } LibraryCalls[] = {
329  // Double-precision floating-point arithmetic helper functions
330  // RTABI chapter 4.1.2, Table 2
331  { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
332  { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
333  { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
334  { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
335 
336  // Double-precision floating-point comparison helper functions
337  // RTABI chapter 4.1.2, Table 3
338  { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
339  { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
340  { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
341  { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
342  { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
343  { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
344  { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
345  { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
346 
347  // Single-precision floating-point arithmetic helper functions
348  // RTABI chapter 4.1.2, Table 4
349  { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
350  { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
351  { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
352  { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
353 
354  // Single-precision floating-point comparison helper functions
355  // RTABI chapter 4.1.2, Table 5
356  { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
357  { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
358  { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
359  { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
360  { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
361  { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
362  { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
363  { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
364 
365  // Floating-point to integer conversions.
366  // RTABI chapter 4.1.2, Table 6
367  { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
368  { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
369  { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
370  { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
371  { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
372  { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
373  { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
374  { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
375 
376  // Conversions between floating types.
377  // RTABI chapter 4.1.2, Table 7
378  { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
379  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
380  { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
381 
382  // Integer to floating-point conversions.
383  // RTABI chapter 4.1.2, Table 8
384  { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
385  { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
386  { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
387  { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
388  { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
389  { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
390  { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
391  { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
392 
393  // Long long helper functions
394  // RTABI chapter 4.2, Table 9
395  { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
396  { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
397  { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
398  { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
399 
400  // Integer division functions
401  // RTABI chapter 4.3.1
402  { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
403  { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
404  { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
405  { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
406  { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
407  { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
408  { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
409  { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
410  };
411 
412  for (const auto &LC : LibraryCalls) {
413  setLibcallName(LC.Op, LC.Name);
414  setLibcallCallingConv(LC.Op, LC.CC);
415  if (LC.Cond != ISD::SETCC_INVALID)
416  setCmpLibcallCC(LC.Op, LC.Cond);
417  }
418 
419  // EABI dependent RTLIB
420  if (TM.Options.EABIVersion == EABI::EABI4 ||
422  static const struct {
423  const RTLIB::Libcall Op;
424  const char *const Name;
425  const CallingConv::ID CC;
426  const ISD::CondCode Cond;
427  } MemOpsLibraryCalls[] = {
428  // Memory operations
429  // RTABI chapter 4.3.4
431  { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
432  { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
433  };
434 
435  for (const auto &LC : MemOpsLibraryCalls) {
436  setLibcallName(LC.Op, LC.Name);
437  setLibcallCallingConv(LC.Op, LC.CC);
438  if (LC.Cond != ISD::SETCC_INVALID)
439  setCmpLibcallCC(LC.Op, LC.Cond);
440  }
441  }
442  }
443 
444  if (Subtarget->isTargetWindows()) {
445  static const struct {
446  const RTLIB::Libcall Op;
447  const char * const Name;
448  const CallingConv::ID CC;
449  } LibraryCalls[] = {
450  { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
451  { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
452  { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
453  { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
454  { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
455  { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
456  { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
457  { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
458  };
459 
460  for (const auto &LC : LibraryCalls) {
461  setLibcallName(LC.Op, LC.Name);
462  setLibcallCallingConv(LC.Op, LC.CC);
463  }
464  }
465 
466  // Use divmod compiler-rt calls for iOS 5.0 and later.
467  if (Subtarget->isTargetMachO() &&
468  !(Subtarget->isTargetIOS() &&
469  Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
470  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
471  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
472  }
473 
474  // The half <-> float conversion functions are always soft-float on
475  // non-watchos platforms, but are needed for some targets which use a
476  // hard-float calling convention by default.
477  if (!Subtarget->isTargetWatchABI()) {
478  if (Subtarget->isAAPCS_ABI()) {
479  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
480  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
481  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
482  } else {
483  setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
484  setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
485  setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
486  }
487  }
488 
489  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
490  // a __gnu_ prefix (which is the default).
491  if (Subtarget->isTargetAEABI()) {
492  static const struct {
493  const RTLIB::Libcall Op;
494  const char * const Name;
495  const CallingConv::ID CC;
496  } LibraryCalls[] = {
497  { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
498  { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
499  { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
500  };
501 
502  for (const auto &LC : LibraryCalls) {
503  setLibcallName(LC.Op, LC.Name);
504  setLibcallCallingConv(LC.Op, LC.CC);
505  }
506  }
507 
508  if (Subtarget->isThumb1Only())
509  addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
510  else
511  addRegisterClass(MVT::i32, &ARM::GPRRegClass);
512 
513  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
514  !Subtarget->isThumb1Only()) {
515  addRegisterClass(MVT::f32, &ARM::SPRRegClass);
516  addRegisterClass(MVT::f64, &ARM::DPRRegClass);
517  }
518 
519  if (Subtarget->hasFullFP16()) {
520  addRegisterClass(MVT::f16, &ARM::HPRRegClass);
524 
527  }
528 
529  for (MVT VT : MVT::vector_valuetypes()) {
530  for (MVT InnerVT : MVT::vector_valuetypes()) {
531  setTruncStoreAction(VT, InnerVT, Expand);
532  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
533  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
534  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
535  }
536 
541 
543  }
544 
547 
550 
551  if (Subtarget->hasNEON()) {
552  addDRTypeForNEON(MVT::v2f32);
553  addDRTypeForNEON(MVT::v8i8);
554  addDRTypeForNEON(MVT::v4i16);
555  addDRTypeForNEON(MVT::v2i32);
556  addDRTypeForNEON(MVT::v1i64);
557 
558  addQRTypeForNEON(MVT::v4f32);
559  addQRTypeForNEON(MVT::v2f64);
560  addQRTypeForNEON(MVT::v16i8);
561  addQRTypeForNEON(MVT::v8i16);
562  addQRTypeForNEON(MVT::v4i32);
563  addQRTypeForNEON(MVT::v2i64);
564 
565  if (Subtarget->hasFullFP16()) {
566  addQRTypeForNEON(MVT::v8f16);
567  addDRTypeForNEON(MVT::v4f16);
568  }
569 
570  // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
571  // neither Neon nor VFP support any arithmetic operations on it.
572  // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
573  // supported for v4f32.
577  // FIXME: Code duplication: FDIV and FREM are expanded always, see
578  // ARMTargetLowering::addTypeForNEON method for details.
581  // FIXME: Create unittest.
582  // In another words, find a way when "copysign" appears in DAG with vector
583  // operands.
585  // FIXME: Code duplication: SETCC has custom operation action, see
586  // ARMTargetLowering::addTypeForNEON method for details.
588  // FIXME: Create unittest for FNEG and for FABS.
600  // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
607 
622 
623  // Mark v2f32 intrinsics.
638 
639  // Neon does not support some operations on v1i64 and v2i64 types.
641  // Custom handling for some quad-vector types to detect VMULL.
645  // Custom handling for some vector types to avoid expensive expansions
650  // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
651  // a destination type that is wider than the source, and nor does
652  // it have a FP_TO_[SU]INT instruction with a narrower destination than
653  // source.
662 
665 
666  // NEON does not have single instruction CTPOP for vectors with element
667  // types wider than 8-bits. However, custom lowering can leverage the
668  // v8i8/v16i8 vcnt instruction.
675 
678 
679  // NEON does not have single instruction CTTZ for vectors.
684 
689 
694 
699 
700  // NEON only has FMA instructions as of VFP4.
701  if (!Subtarget->hasVFP4()) {
704  }
705 
723 
724  // It is legal to extload from v4i8 to v4i16 or v4i32.
726  MVT::v2i32}) {
727  for (MVT VT : MVT::integer_vector_valuetypes()) {
731  }
732  }
733  }
734 
735  if (Subtarget->isFPOnlySP()) {
736  // When targeting a floating-point unit with only single-precision
737  // operations, f64 is legal for the few double-precision instructions which
738  // are present However, no double-precision operations other than moves,
739  // loads and stores are provided by the hardware.
772  }
773 
775 
776  // ARM does not have floating-point extending loads.
777  for (MVT VT : MVT::fp_valuetypes()) {
780  }
781 
782  // ... or truncating stores
786 
787  // ARM does not have i1 sign extending load.
788  for (MVT VT : MVT::integer_valuetypes())
790 
791  // ARM supports all 4 flavors of integer indexed load / store.
792  if (!Subtarget->isThumb1Only()) {
793  for (unsigned im = (unsigned)ISD::PRE_INC;
803  }
804  } else {
805  // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
808  }
809 
814 
817 
818  // i64 operation support.
821  if (Subtarget->isThumb1Only()) {
824  }
825  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
826  || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
828 
835 
836  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
837  if (Subtarget->isThumb1Only()) {
841  }
842 
843  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
845 
846  // ARM does not have ROTL.
848  for (MVT VT : MVT::vector_valuetypes()) {
851  }
854  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
857  }
858 
859  // @llvm.readcyclecounter requires the Performance Monitors extension.
860  // Default to the 0 expansion on unsupported platforms.
861  // FIXME: Technically there are older ARM CPUs that have
862  // implementation-specific ways of obtaining this information.
863  if (Subtarget->hasPerfMon())
865 
866  // Only ARMv6 has BSWAP.
867  if (!Subtarget->hasV6Ops())
869 
870  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
871  : Subtarget->hasDivideInARMMode();
872  if (!hasDivide) {
873  // These are expanded into libcalls if the cpu doesn't have HW divider.
876  }
877 
878  if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
881 
884  }
885 
888 
889  // Register based DivRem for AEABI (RTABI 4.2)
890  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
891  Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
892  Subtarget->isTargetWindows()) {
895  HasStandaloneRem = false;
896 
897  if (Subtarget->isTargetWindows()) {
898  const struct {
899  const RTLIB::Libcall Op;
900  const char * const Name;
901  const CallingConv::ID CC;
902  } LibraryCalls[] = {
903  { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
904  { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
905  { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
906  { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
907 
908  { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
909  { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
910  { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
911  { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
912  };
913 
914  for (const auto &LC : LibraryCalls) {
915  setLibcallName(LC.Op, LC.Name);
916  setLibcallCallingConv(LC.Op, LC.CC);
917  }
918  } else {
919  const struct {
920  const RTLIB::Libcall Op;
921  const char * const Name;
922  const CallingConv::ID CC;
923  } LibraryCalls[] = {
924  { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
925  { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
926  { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
927  { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
928 
929  { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
930  { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
931  { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
932  { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
933  };
934 
935  for (const auto &LC : LibraryCalls) {
936  setLibcallName(LC.Op, LC.Name);
937  setLibcallCallingConv(LC.Op, LC.CC);
938  }
939  }
940 
945  } else {
948  }
949 
950  if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
951  for (auto &VT : {MVT::f32, MVT::f64})
953 
958 
961 
962  // Use the default implementation.
969 
970  if (Subtarget->isTargetWindows())
972  else
974 
975  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
976  // the default expansion.
977  InsertFencesForAtomic = false;
978  if (Subtarget->hasAnyDataBarrier() &&
979  (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
980  // ATOMIC_FENCE needs custom lowering; the others should have been expanded
981  // to ldrex/strex loops already.
983  if (!Subtarget->isThumb() || !Subtarget->isMClass())
985 
986  // On v8, we have particularly efficient implementations of atomic fences
987  // if they can be combined with nearby atomic loads and stores.
988  if (!Subtarget->hasAcquireRelease() ||
989  getTargetMachine().getOptLevel() == 0) {
990  // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
991  InsertFencesForAtomic = true;
992  }
993  } else {
994  // If there's anything we can use as a barrier, go through custom lowering
995  // for ATOMIC_FENCE.
996  // If target has DMB in thumb, Fences can be inserted.
997  if (Subtarget->hasDataBarrier())
998  InsertFencesForAtomic = true;
999 
1001  Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1002 
1003  // Set them all for expansion, which will force libcalls.
1016  // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1017  // Unordered/Monotonic case.
1018  if (!InsertFencesForAtomic) {
1021  }
1022  }
1023 
1025 
1026  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1027  if (!Subtarget->hasV6Ops()) {
1030  }
1032 
1033  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1034  !Subtarget->isThumb1Only()) {
1035  // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1036  // iff target supports vfp2.
1039  }
1040 
1041  // We want to custom lower some of our intrinsics.
1046  if (Subtarget->useSjLjEH())
1047  setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1048 
1058  if (Subtarget->hasFullFP16()) {
1062  }
1063 
1065 
1068  if (Subtarget->hasFullFP16())
1073 
1074  // We don't support sin/cos/fmod/copysign/pow
1083  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1084  !Subtarget->isThumb1Only()) {
1087  }
1090 
1091  if (!Subtarget->hasVFP4()) {
1094  }
1095 
1096  // Various VFP goodness
1097  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1098  // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1099  if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
1102  }
1103 
1104  // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1105  if (!Subtarget->hasFP16()) {
1108  }
1109  }
1110 
1111  // Use __sincos_stret if available.
1112  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1113  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1116  }
1117 
1118  // FP-ARMv8 implements a lot of rounding-like FP operations.
1119  if (Subtarget->hasFPARMv8()) {
1132 
1133  if (!Subtarget->isFPOnlySP()) {
1142  }
1143  }
1144 
1145  if (Subtarget->hasNEON()) {
1146  // vmin and vmax aren't available in a scalar form, so we use
1147  // a NEON instruction with an undef lane instead.
1156 
1157  if (Subtarget->hasFullFP16()) {
1162 
1167  }
1168  }
1169 
1170  // We have target-specific dag combine patterns for the following nodes:
1171  // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1178 
1179  if (Subtarget->hasV6Ops())
1181  if (Subtarget->isThumb1Only())
1183 
1185 
1186  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1187  !Subtarget->hasVFP2())
1189  else
1191 
1192  //// temporary - rewrite interface to use type
1193  MaxStoresPerMemset = 8;
1195  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1197  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1199 
1200  // On ARM arguments smaller than 4 bytes are extended, so all arguments
1201  // are at least 4 bytes aligned.
1203 
1204  // Prefer likely predicted branches to selects on out-of-order cores.
1205  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1206 
1208 
1209  setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1210 
1211  if (Subtarget->isThumb() || Subtarget->isThumb2())
1213 }
1214 
1216  return Subtarget->useSoftFloat();
1217 }
1218 
1219 // FIXME: It might make sense to define the representative register class as the
1220 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1221 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1222 // SPR's representative would be DPR_VFP2. This should work well if register
1223 // pressure tracking were modified such that a register use would increment the
1224 // pressure of the register class's representative and all of it's super
1225 // classes' representatives transitively. We have not implemented this because
1226 // of the difficulty prior to coalescing of modeling operand register classes
1227 // due to the common occurrence of cross class copies and subregister insertions
1228 // and extractions.
1229 std::pair<const TargetRegisterClass *, uint8_t>
1231  MVT VT) const {
1232  const TargetRegisterClass *RRC = nullptr;
1233  uint8_t Cost = 1;
1234  switch (VT.SimpleTy) {
1235  default:
1237  // Use DPR as representative register class for all floating point
1238  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1239  // the cost is 1 for both f32 and f64.
1240  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1241  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1242  RRC = &ARM::DPRRegClass;
1243  // When NEON is used for SP, only half of the register file is available
1244  // because operations that define both SP and DP results will be constrained
1245  // to the VFP2 class (D0-D15). We currently model this constraint prior to
1246  // coalescing by double-counting the SP regs. See the FIXME above.
1247  if (Subtarget->useNEONForSinglePrecisionFP())
1248  Cost = 2;
1249  break;
1250  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1251  case MVT::v4f32: case MVT::v2f64:
1252  RRC = &ARM::DPRRegClass;
1253  Cost = 2;
1254  break;
1255  case MVT::v4i64:
1256  RRC = &ARM::DPRRegClass;
1257  Cost = 4;
1258  break;
1259  case MVT::v8i64:
1260  RRC = &ARM::DPRRegClass;
1261  Cost = 8;
1262  break;
1263  }
1264  return std::make_pair(RRC, Cost);
1265 }
1266 
1267 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1268  switch ((ARMISD::NodeType)Opcode) {
1269  case ARMISD::FIRST_NUMBER: break;
1270  case ARMISD::Wrapper: return "ARMISD::Wrapper";
1271  case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1272  case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1273  case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1274  case ARMISD::CALL: return "ARMISD::CALL";
1275  case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1276  case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1277  case ARMISD::BRCOND: return "ARMISD::BRCOND";
1278  case ARMISD::BR_JT: return "ARMISD::BR_JT";
1279  case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1280  case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1281  case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1282  case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1283  case ARMISD::CMP: return "ARMISD::CMP";
1284  case ARMISD::CMN: return "ARMISD::CMN";
1285  case ARMISD::CMPZ: return "ARMISD::CMPZ";
1286  case ARMISD::CMPFP: return "ARMISD::CMPFP";
1287  case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1288  case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1289  case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1290 
1291  case ARMISD::CMOV: return "ARMISD::CMOV";
1292  case ARMISD::SUBS: return "ARMISD::SUBS";
1293 
1294  case ARMISD::SSAT: return "ARMISD::SSAT";
1295  case ARMISD::USAT: return "ARMISD::USAT";
1296 
1297  case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1298  case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1299  case ARMISD::RRX: return "ARMISD::RRX";
1300 
1301  case ARMISD::ADDC: return "ARMISD::ADDC";
1302  case ARMISD::ADDE: return "ARMISD::ADDE";
1303  case ARMISD::SUBC: return "ARMISD::SUBC";
1304  case ARMISD::SUBE: return "ARMISD::SUBE";
1305 
1306  case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1307  case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1308  case ARMISD::VMOVhr: return "ARMISD::VMOVhr";
1309  case ARMISD::VMOVrh: return "ARMISD::VMOVrh";
1310  case ARMISD::VMOVSR: return "ARMISD::VMOVSR";
1311 
1312  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1313  case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1314  case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1315 
1316  case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1317 
1318  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1319 
1320  case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1321 
1322  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1323 
1324  case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1325 
1326  case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1327  case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1328 
1329  case ARMISD::VCEQ: return "ARMISD::VCEQ";
1330  case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1331  case ARMISD::VCGE: return "ARMISD::VCGE";
1332  case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1333  case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1334  case ARMISD::VCGEU: return "ARMISD::VCGEU";
1335  case ARMISD::VCGT: return "ARMISD::VCGT";
1336  case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1337  case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1338  case ARMISD::VCGTU: return "ARMISD::VCGTU";
1339  case ARMISD::VTST: return "ARMISD::VTST";
1340 
1341  case ARMISD::VSHL: return "ARMISD::VSHL";
1342  case ARMISD::VSHRs: return "ARMISD::VSHRs";
1343  case ARMISD::VSHRu: return "ARMISD::VSHRu";
1344  case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
1345  case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
1346  case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
1347  case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
1348  case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
1349  case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
1350  case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
1351  case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
1352  case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
1353  case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
1354  case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
1355  case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
1356  case ARMISD::VSLI: return "ARMISD::VSLI";
1357  case ARMISD::VSRI: return "ARMISD::VSRI";
1358  case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1359  case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1360  case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1361  case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1362  case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1363  case ARMISD::VDUP: return "ARMISD::VDUP";
1364  case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1365  case ARMISD::VEXT: return "ARMISD::VEXT";
1366  case ARMISD::VREV64: return "ARMISD::VREV64";
1367  case ARMISD::VREV32: return "ARMISD::VREV32";
1368  case ARMISD::VREV16: return "ARMISD::VREV16";
1369  case ARMISD::VZIP: return "ARMISD::VZIP";
1370  case ARMISD::VUZP: return "ARMISD::VUZP";
1371  case ARMISD::VTRN: return "ARMISD::VTRN";
1372  case ARMISD::VTBL1: return "ARMISD::VTBL1";
1373  case ARMISD::VTBL2: return "ARMISD::VTBL2";
1374  case ARMISD::VMULLs: return "ARMISD::VMULLs";
1375  case ARMISD::VMULLu: return "ARMISD::VMULLu";
1376  case ARMISD::UMAAL: return "ARMISD::UMAAL";
1377  case ARMISD::UMLAL: return "ARMISD::UMLAL";
1378  case ARMISD::SMLAL: return "ARMISD::SMLAL";
1379  case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1380  case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1381  case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1382  case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1383  case ARMISD::SMULWB: return "ARMISD::SMULWB";
1384  case ARMISD::SMULWT: return "ARMISD::SMULWT";
1385  case ARMISD::SMLALD: return "ARMISD::SMLALD";
1386  case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1387  case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1388  case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1389  case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
1390  case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1391  case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1392  case ARMISD::BFI: return "ARMISD::BFI";
1393  case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1394  case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1395  case ARMISD::VBSL: return "ARMISD::VBSL";
1396  case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1397  case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1398  case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1399  case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1400  case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1401  case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1402  case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1403  case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1404  case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1405  case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1406  case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1407  case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1408  case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1409  case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1410  case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1411  case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1412  case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1413  case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1414  case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1415  case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1416  case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1417  case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1418  case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1419  }
1420  return nullptr;
1421 }
1422 
1424  EVT VT) const {
1425  if (!VT.isVector())
1426  return getPointerTy(DL);
1428 }
1429 
1430 /// getRegClassFor - Return the register class that should be used for the
1431 /// specified value type.
1433  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1434  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1435  // load / store 4 to 8 consecutive D registers.
1436  if (Subtarget->hasNEON()) {
1437  if (VT == MVT::v4i64)
1438  return &ARM::QQPRRegClass;
1439  if (VT == MVT::v8i64)
1440  return &ARM::QQQQPRRegClass;
1441  }
1442  return TargetLowering::getRegClassFor(VT);
1443 }
1444 
1445 // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1446 // source/dest is aligned and the copy size is large enough. We therefore want
1447 // to align such objects passed to memory intrinsics.
1449  unsigned &PrefAlign) const {
1450  if (!isa<MemIntrinsic>(CI))
1451  return false;
1452  MinSize = 8;
1453  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1454  // cycle faster than 4-byte aligned LDM.
1455  PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1456  return true;
1457 }
1458 
1459 // Create a fast isel object.
1460 FastISel *
1462  const TargetLibraryInfo *libInfo) const {
1463  return ARM::createFastISel(funcInfo, libInfo);
1464 }
1465 
1467  unsigned NumVals = N->getNumValues();
1468  if (!NumVals)
1469  return Sched::RegPressure;
1470 
1471  for (unsigned i = 0; i != NumVals; ++i) {
1472  EVT VT = N->getValueType(i);
1473  if (VT == MVT::Glue || VT == MVT::Other)
1474  continue;
1475  if (VT.isFloatingPoint() || VT.isVector())
1476  return Sched::ILP;
1477  }
1478 
1479  if (!N->isMachineOpcode())
1480  return Sched::RegPressure;
1481 
1482  // Load are scheduled for latency even if there instruction itinerary
1483  // is not available.
1484  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1485  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1486 
1487  if (MCID.getNumDefs() == 0)
1488  return Sched::RegPressure;
1489  if (!Itins->isEmpty() &&
1490  Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1491  return Sched::ILP;
1492 
1493  return Sched::RegPressure;
1494 }
1495 
1496 //===----------------------------------------------------------------------===//
1497 // Lowering Code
1498 //===----------------------------------------------------------------------===//
1499 
1500 static bool isSRL16(const SDValue &Op) {
1501  if (Op.getOpcode() != ISD::SRL)
1502  return false;
1503  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1504  return Const->getZExtValue() == 16;
1505  return false;
1506 }
1507 
1508 static bool isSRA16(const SDValue &Op) {
1509  if (Op.getOpcode() != ISD::SRA)
1510  return false;
1511  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1512  return Const->getZExtValue() == 16;
1513  return false;
1514 }
1515 
1516 static bool isSHL16(const SDValue &Op) {
1517  if (Op.getOpcode() != ISD::SHL)
1518  return false;
1519  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1520  return Const->getZExtValue() == 16;
1521  return false;
1522 }
1523 
1524 // Check for a signed 16-bit value. We special case SRA because it makes it
1525 // more simple when also looking for SRAs that aren't sign extending a
1526 // smaller value. Without the check, we'd need to take extra care with
1527 // checking order for some operations.
1528 static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1529  if (isSRA16(Op))
1530  return isSHL16(Op.getOperand(0));
1531  return DAG.ComputeNumSignBits(Op) == 17;
1532 }
1533 
1534 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1536  switch (CC) {
1537  default: llvm_unreachable("Unknown condition code!");
1538  case ISD::SETNE: return ARMCC::NE;
1539  case ISD::SETEQ: return ARMCC::EQ;
1540  case ISD::SETGT: return ARMCC::GT;
1541  case ISD::SETGE: return ARMCC::GE;
1542  case ISD::SETLT: return ARMCC::LT;
1543  case ISD::SETLE: return ARMCC::LE;
1544  case ISD::SETUGT: return ARMCC::HI;
1545  case ISD::SETUGE: return ARMCC::HS;
1546  case ISD::SETULT: return ARMCC::LO;
1547  case ISD::SETULE: return ARMCC::LS;
1548  }
1549 }
1550 
1551 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1553  ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1554  CondCode2 = ARMCC::AL;
1555  InvalidOnQNaN = true;
1556  switch (CC) {
1557  default: llvm_unreachable("Unknown FP condition!");
1558  case ISD::SETEQ:
1559  case ISD::SETOEQ:
1560  CondCode = ARMCC::EQ;
1561  InvalidOnQNaN = false;
1562  break;
1563  case ISD::SETGT:
1564  case ISD::SETOGT: CondCode = ARMCC::GT; break;
1565  case ISD::SETGE:
1566  case ISD::SETOGE: CondCode = ARMCC::GE; break;
1567  case ISD::SETOLT: CondCode = ARMCC::MI; break;
1568  case ISD::SETOLE: CondCode = ARMCC::LS; break;
1569  case ISD::SETONE:
1570  CondCode = ARMCC::MI;
1571  CondCode2 = ARMCC::GT;
1572  InvalidOnQNaN = false;
1573  break;
1574  case ISD::SETO: CondCode = ARMCC::VC; break;
1575  case ISD::SETUO: CondCode = ARMCC::VS; break;
1576  case ISD::SETUEQ:
1577  CondCode = ARMCC::EQ;
1578  CondCode2 = ARMCC::VS;
1579  InvalidOnQNaN = false;
1580  break;
1581  case ISD::SETUGT: CondCode = ARMCC::HI; break;
1582  case ISD::SETUGE: CondCode = ARMCC::PL; break;
1583  case ISD::SETLT:
1584  case ISD::SETULT: CondCode = ARMCC::LT; break;
1585  case ISD::SETLE:
1586  case ISD::SETULE: CondCode = ARMCC::LE; break;
1587  case ISD::SETNE:
1588  case ISD::SETUNE:
1589  CondCode = ARMCC::NE;
1590  InvalidOnQNaN = false;
1591  break;
1592  }
1593 }
1594 
1595 //===----------------------------------------------------------------------===//
1596 // Calling Convention Implementation
1597 //===----------------------------------------------------------------------===//
1598 
1599 /// getEffectiveCallingConv - Get the effective calling convention, taking into
1600 /// account presence of floating point hardware and calling convention
1601 /// limitations, such as support for variadic functions.
1603 ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1604  bool isVarArg) const {
1605  switch (CC) {
1606  default:
1607  report_fatal_error("Unsupported calling convention");
1609  case CallingConv::ARM_APCS:
1610  case CallingConv::GHC:
1611  return CC;
1615  case CallingConv::Swift:
1617  case CallingConv::C:
1618  if (!Subtarget->isAAPCS_ABI())
1619  return CallingConv::ARM_APCS;
1620  else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1622  !isVarArg)
1624  else
1625  return CallingConv::ARM_AAPCS;
1626  case CallingConv::Fast:
1628  if (!Subtarget->isAAPCS_ABI()) {
1629  if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1630  return CallingConv::Fast;
1631  return CallingConv::ARM_APCS;
1632  } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1634  else
1635  return CallingConv::ARM_AAPCS;
1636  }
1637 }
1638 
1640  bool isVarArg) const {
1641  return CCAssignFnForNode(CC, false, isVarArg);
1642 }
1643 
1645  bool isVarArg) const {
1646  return CCAssignFnForNode(CC, true, isVarArg);
1647 }
1648 
1649 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1650 /// CallingConvention.
1651 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1652  bool Return,
1653  bool isVarArg) const {
1654  switch (getEffectiveCallingConv(CC, isVarArg)) {
1655  default:
1656  report_fatal_error("Unsupported calling convention");
1657  case CallingConv::ARM_APCS:
1658  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1660  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1662  return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1663  case CallingConv::Fast:
1664  return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1665  case CallingConv::GHC:
1666  return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1668  return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1669  }
1670 }
1671 
1672 /// LowerCallResult - Lower the result values of a call into the
1673 /// appropriate copies out of appropriate physical registers.
1674 SDValue ARMTargetLowering::LowerCallResult(
1675  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1676  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1677  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1678  SDValue ThisVal) const {
1679  // Assign locations to each value returned by this call.
1681  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1682  *DAG.getContext());
1683  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1684 
1685  // Copy all of the result registers out of their specified physreg.
1686  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1687  CCValAssign VA = RVLocs[i];
1688 
1689  // Pass 'this' value directly from the argument to return value, to avoid
1690  // reg unit interference
1691  if (i == 0 && isThisReturn) {
1692  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
1693  "unexpected return calling convention register assignment");
1694  InVals.push_back(ThisVal);
1695  continue;
1696  }
1697 
1698  SDValue Val;
1699  if (VA.needsCustom()) {
1700  // Handle f64 or half of a v2f64.
1701  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1702  InFlag);
1703  Chain = Lo.getValue(1);
1704  InFlag = Lo.getValue(2);
1705  VA = RVLocs[++i]; // skip ahead to next loc
1706  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1707  InFlag);
1708  Chain = Hi.getValue(1);
1709  InFlag = Hi.getValue(2);
1710  if (!Subtarget->isLittle())
1711  std::swap (Lo, Hi);
1712  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1713 
1714  if (VA.getLocVT() == MVT::v2f64) {
1715  SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1716  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1717  DAG.getConstant(0, dl, MVT::i32));
1718 
1719  VA = RVLocs[++i]; // skip ahead to next loc
1720  Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1721  Chain = Lo.getValue(1);
1722  InFlag = Lo.getValue(2);
1723  VA = RVLocs[++i]; // skip ahead to next loc
1724  Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1725  Chain = Hi.getValue(1);
1726  InFlag = Hi.getValue(2);
1727  if (!Subtarget->isLittle())
1728  std::swap (Lo, Hi);
1729  Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1730  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1731  DAG.getConstant(1, dl, MVT::i32));
1732  }
1733  } else {
1734  Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1735  InFlag);
1736  Chain = Val.getValue(1);
1737  InFlag = Val.getValue(2);
1738  }
1739 
1740  switch (VA.getLocInfo()) {
1741  default: llvm_unreachable("Unknown loc info!");
1742  case CCValAssign::Full: break;
1743  case CCValAssign::BCvt:
1744  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1745  break;
1746  }
1747 
1748  InVals.push_back(Val);
1749  }
1750 
1751  return Chain;
1752 }
1753 
1754 /// LowerMemOpCallTo - Store the argument to the stack.
1755 SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1756  SDValue Arg, const SDLoc &dl,
1757  SelectionDAG &DAG,
1758  const CCValAssign &VA,
1759  ISD::ArgFlagsTy Flags) const {
1760  unsigned LocMemOffset = VA.getLocMemOffset();
1761  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1762  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1763  StackPtr, PtrOff);
1764  return DAG.getStore(
1765  Chain, dl, Arg, PtrOff,
1766  MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1767 }
1768 
1769 void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1770  SDValue Chain, SDValue &Arg,
1771  RegsToPassVector &RegsToPass,
1772  CCValAssign &VA, CCValAssign &NextVA,
1773  SDValue &StackPtr,
1774  SmallVectorImpl<SDValue> &MemOpChains,
1775  ISD::ArgFlagsTy Flags) const {
1776  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1777  DAG.getVTList(MVT::i32, MVT::i32), Arg);
1778  unsigned id = Subtarget->isLittle() ? 0 : 1;
1779  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1780 
1781  if (NextVA.isRegLoc())
1782  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1783  else {
1784  assert(NextVA.isMemLoc());
1785  if (!StackPtr.getNode())
1786  StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1787  getPointerTy(DAG.getDataLayout()));
1788 
1789  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1790  dl, DAG, NextVA,
1791  Flags));
1792  }
1793 }
1794 
1795 /// LowerCall - Lowering a call into a callseq_start <-
1796 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1797 /// nodes.
1798 SDValue
1799 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1800  SmallVectorImpl<SDValue> &InVals) const {
1801  SelectionDAG &DAG = CLI.DAG;
1802  SDLoc &dl = CLI.DL;
1804  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1806  SDValue Chain = CLI.Chain;
1807  SDValue Callee = CLI.Callee;
1808  bool &isTailCall = CLI.IsTailCall;
1809  CallingConv::ID CallConv = CLI.CallConv;
1810  bool doesNotRet = CLI.DoesNotReturn;
1811  bool isVarArg = CLI.IsVarArg;
1812 
1813  MachineFunction &MF = DAG.getMachineFunction();
1814  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1815  bool isThisReturn = false;
1816  bool isSibCall = false;
1817  auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
1818 
1819  // Disable tail calls if they're not supported.
1820  if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1821  isTailCall = false;
1822 
1823  if (isTailCall) {
1824  // Check if it's really possible to do a tail call.
1825  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1826  isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(),
1827  Outs, OutVals, Ins, DAG);
1828  if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
1829  report_fatal_error("failed to perform tail call elimination on a call "
1830  "site marked musttail");
1831  // We don't support GuaranteedTailCallOpt for ARM, only automatically
1832  // detected sibcalls.
1833  if (isTailCall) {
1834  ++NumTailCalls;
1835  isSibCall = true;
1836  }
1837  }
1838 
1839  // Analyze operands of the call, assigning locations to each operand.
1841  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1842  *DAG.getContext());
1843  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
1844 
1845  // Get a count of how many bytes are to be pushed on the stack.
1846  unsigned NumBytes = CCInfo.getNextStackOffset();
1847 
1848  // For tail calls, memory operands are available in our caller's stack.
1849  if (isSibCall)
1850  NumBytes = 0;
1851 
1852  // Adjust the stack pointer for the new arguments...
1853  // These operations are automatically eliminated by the prolog/epilog pass
1854  if (!isSibCall)
1855  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
1856 
1857  SDValue StackPtr =
1858  DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
1859 
1860  RegsToPassVector RegsToPass;
1861  SmallVector<SDValue, 8> MemOpChains;
1862 
1863  // Walk the register/memloc assignments, inserting copies/loads. In the case
1864  // of tail call optimization, arguments are handled later.
1865  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1866  i != e;
1867  ++i, ++realArgIdx) {
1868  CCValAssign &VA = ArgLocs[i];
1869  SDValue Arg = OutVals[realArgIdx];
1870  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1871  bool isByVal = Flags.isByVal();
1872 
1873  // Promote the value if needed.
1874  switch (VA.getLocInfo()) {
1875  default: llvm_unreachable("Unknown loc info!");
1876  case CCValAssign::Full: break;
1877  case CCValAssign::SExt:
1878  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1879  break;
1880  case CCValAssign::ZExt:
1881  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1882  break;
1883  case CCValAssign::AExt:
1884  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1885  break;
1886  case CCValAssign::BCvt:
1887  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1888  break;
1889  }
1890 
1891  // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1892  if (VA.needsCustom()) {
1893  if (VA.getLocVT() == MVT::v2f64) {
1894  SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1895  DAG.getConstant(0, dl, MVT::i32));
1896  SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1897  DAG.getConstant(1, dl, MVT::i32));
1898 
1899  PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1900  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1901 
1902  VA = ArgLocs[++i]; // skip ahead to next loc
1903  if (VA.isRegLoc()) {
1904  PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1905  VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1906  } else {
1907  assert(VA.isMemLoc());
1908 
1909  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1910  dl, DAG, VA, Flags));
1911  }
1912  } else {
1913  PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1914  StackPtr, MemOpChains, Flags);
1915  }
1916  } else if (VA.isRegLoc()) {
1917  if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
1918  Outs[0].VT == MVT::i32) {
1919  assert(VA.getLocVT() == MVT::i32 &&
1920  "unexpected calling convention register assignment");
1921  assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
1922  "unexpected use of 'returned'");
1923  isThisReturn = true;
1924  }
1925  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1926  } else if (isByVal) {
1927  assert(VA.isMemLoc());
1928  unsigned offset = 0;
1929 
1930  // True if this byval aggregate will be split between registers
1931  // and memory.
1932  unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1933  unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1934 
1935  if (CurByValIdx < ByValArgsCount) {
1936 
1937  unsigned RegBegin, RegEnd;
1938  CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1939 
1940  EVT PtrVT =
1942  unsigned int i, j;
1943  for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1944  SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
1945  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1946  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1948  DAG.InferPtrAlignment(AddArg));
1949  MemOpChains.push_back(Load.getValue(1));
1950  RegsToPass.push_back(std::make_pair(j, Load));
1951  }
1952 
1953  // If parameter size outsides register area, "offset" value
1954  // helps us to calculate stack slot for remained part properly.
1955  offset = RegEnd - RegBegin;
1956 
1957  CCInfo.nextInRegsParam();
1958  }
1959 
1960  if (Flags.getByValSize() > 4*offset) {
1961  auto PtrVT = getPointerTy(DAG.getDataLayout());
1962  unsigned LocMemOffset = VA.getLocMemOffset();
1963  SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1964  SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
1965  SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
1966  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
1967  SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
1968  MVT::i32);
1969  SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
1970  MVT::i32);
1971 
1972  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1973  SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1974  MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1975  Ops));
1976  }
1977  } else if (!isSibCall) {
1978  assert(VA.isMemLoc());
1979 
1980  MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1981  dl, DAG, VA, Flags));
1982  }
1983  }
1984 
1985  if (!MemOpChains.empty())
1986  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1987 
1988  // Build a sequence of copy-to-reg nodes chained together with token chain
1989  // and flag operands which copy the outgoing args into the appropriate regs.
1990  SDValue InFlag;
1991  // Tail call byval lowering might overwrite argument registers so in case of
1992  // tail call optimization the copies to registers are lowered later.
1993  if (!isTailCall)
1994  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1995  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1996  RegsToPass[i].second, InFlag);
1997  InFlag = Chain.getValue(1);
1998  }
1999 
2000  // For tail calls lower the arguments to the 'real' stack slot.
2001  if (isTailCall) {
2002  // Force all the incoming stack arguments to be loaded from the stack
2003  // before any new outgoing arguments are stored to the stack, because the
2004  // outgoing stack slots may alias the incoming argument stack slots, and
2005  // the alias isn't otherwise explicit. This is slightly more conservative
2006  // than necessary, because it means that each store effectively depends
2007  // on every argument instead of just those arguments it would clobber.
2008 
2009  // Do not flag preceding copytoreg stuff together with the following stuff.
2010  InFlag = SDValue();
2011  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2012  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2013  RegsToPass[i].second, InFlag);
2014  InFlag = Chain.getValue(1);
2015  }
2016  InFlag = SDValue();
2017  }
2018 
2019  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2020  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2021  // node so that legalize doesn't hack it.
2022  bool isDirect = false;
2023 
2024  const TargetMachine &TM = getTargetMachine();
2025  const Module *Mod = MF.getFunction().getParent();
2026  const GlobalValue *GV = nullptr;
2027  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2028  GV = G->getGlobal();
2029  bool isStub =
2030  !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2031 
2032  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2033  bool isLocalARMFunc = false;
2035  auto PtrVt = getPointerTy(DAG.getDataLayout());
2036 
2037  if (Subtarget->genLongCalls()) {
2038  assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2039  "long-calls codegen is not position independent!");
2040  // Handle a global address or an external symbol. If it's not one of
2041  // those, the target's already in a register, so we don't need to do
2042  // anything extra.
2043  if (isa<GlobalAddressSDNode>(Callee)) {
2044  // Create a constant pool entry for the callee address
2045  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2046  ARMConstantPoolValue *CPV =
2047  ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2048 
2049  // Get the address of the callee into a register
2050  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2051  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2052  Callee = DAG.getLoad(
2053  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2055  } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2056  const char *Sym = S->getSymbol();
2057 
2058  // Create a constant pool entry for the callee address
2059  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2060  ARMConstantPoolValue *CPV =
2062  ARMPCLabelIndex, 0);
2063  // Get the address of the callee into a register
2064  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2065  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2066  Callee = DAG.getLoad(
2067  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2069  }
2070  } else if (isa<GlobalAddressSDNode>(Callee)) {
2071  // If we're optimizing for minimum size and the function is called three or
2072  // more times in this block, we can improve codesize by calling indirectly
2073  // as BLXr has a 16-bit encoding.
2074  auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2075  auto *BB = CLI.CS.getParent();
2076  bool PreferIndirect =
2077  Subtarget->isThumb() && Subtarget->optForMinSize() &&
2078  count_if(GV->users(), [&BB](const User *U) {
2079  return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
2080  }) > 2;
2081 
2082  if (!PreferIndirect) {
2083  isDirect = true;
2084  bool isDef = GV->isStrongDefinitionForLinker();
2085 
2086  // ARM call to a local ARM function is predicable.
2087  isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2088  // tBX takes a register source operand.
2089  if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2090  assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2091  Callee = DAG.getNode(
2092  ARMISD::WrapperPIC, dl, PtrVt,
2093  DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2094  Callee = DAG.getLoad(
2095  PtrVt, dl, DAG.getEntryNode(), Callee,
2097  /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2099  } else if (Subtarget->isTargetCOFF()) {
2100  assert(Subtarget->isTargetWindows() &&
2101  "Windows is the only supported COFF target");
2102  unsigned TargetFlags = GV->hasDLLImportStorageClass()
2105  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
2106  TargetFlags);
2107  if (GV->hasDLLImportStorageClass())
2108  Callee =
2109  DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2110  DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2112  } else {
2113  Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2114  }
2115  }
2116  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2117  isDirect = true;
2118  // tBX takes a register source operand.
2119  const char *Sym = S->getSymbol();
2120  if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2121  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2122  ARMConstantPoolValue *CPV =
2124  ARMPCLabelIndex, 4);
2125  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2126  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2127  Callee = DAG.getLoad(
2128  PtrVt, dl, DAG.getEntryNode(), CPAddr,
2130  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2131  Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2132  } else {
2133  Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2134  }
2135  }
2136 
2137  // FIXME: handle tail calls differently.
2138  unsigned CallOpc;
2139  if (Subtarget->isThumb()) {
2140  if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2141  CallOpc = ARMISD::CALL_NOLINK;
2142  else
2143  CallOpc = ARMISD::CALL;
2144  } else {
2145  if (!isDirect && !Subtarget->hasV5TOps())
2146  CallOpc = ARMISD::CALL_NOLINK;
2147  else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2148  // Emit regular call when code size is the priority
2149  !Subtarget->optForMinSize())
2150  // "mov lr, pc; b _foo" to avoid confusing the RSP
2151  CallOpc = ARMISD::CALL_NOLINK;
2152  else
2153  CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2154  }
2155 
2156  std::vector<SDValue> Ops;
2157  Ops.push_back(Chain);
2158  Ops.push_back(Callee);
2159 
2160  // Add argument registers to the end of the list so that they are known live
2161  // into the call.
2162  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2163  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2164  RegsToPass[i].second.getValueType()));
2165 
2166  // Add a register mask operand representing the call-preserved registers.
2167  if (!isTailCall) {
2168  const uint32_t *Mask;
2169  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2170  if (isThisReturn) {
2171  // For 'this' returns, use the R0-preserving mask if applicable
2172  Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2173  if (!Mask) {
2174  // Set isThisReturn to false if the calling convention is not one that
2175  // allows 'returned' to be modeled in this way, so LowerCallResult does
2176  // not try to pass 'this' straight through
2177  isThisReturn = false;
2178  Mask = ARI->getCallPreservedMask(MF, CallConv);
2179  }
2180  } else
2181  Mask = ARI->getCallPreservedMask(MF, CallConv);
2182 
2183  assert(Mask && "Missing call preserved mask for calling convention");
2184  Ops.push_back(DAG.getRegisterMask(Mask));
2185  }
2186 
2187  if (InFlag.getNode())
2188  Ops.push_back(InFlag);
2189 
2190  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2191  if (isTailCall) {
2193  return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2194  }
2195 
2196  // Returns a chain and a flag for retval copy to use.
2197  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2198  InFlag = Chain.getValue(1);
2199 
2200  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2201  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2202  if (!Ins.empty())
2203  InFlag = Chain.getValue(1);
2204 
2205  // Handle result values, copying them out of physregs into vregs that we
2206  // return.
2207  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2208  InVals, isThisReturn,
2209  isThisReturn ? OutVals[0] : SDValue());
2210 }
2211 
2212 /// HandleByVal - Every parameter *after* a byval parameter is passed
2213 /// on the stack. Remember the next parameter register to allocate,
2214 /// and then confiscate the rest of the parameter registers to insure
2215 /// this.
2216 void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2217  unsigned Align) const {
2218  // Byval (as with any stack) slots are always at least 4 byte aligned.
2219  Align = std::max(Align, 4U);
2220 
2221  unsigned Reg = State->AllocateReg(GPRArgRegs);
2222  if (!Reg)
2223  return;
2224 
2225  unsigned AlignInRegs = Align / 4;
2226  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2227  for (unsigned i = 0; i < Waste; ++i)
2228  Reg = State->AllocateReg(GPRArgRegs);
2229 
2230  if (!Reg)
2231  return;
2232 
2233  unsigned Excess = 4 * (ARM::R4 - Reg);
2234 
2235  // Special case when NSAA != SP and parameter size greater than size of
2236  // all remained GPR regs. In that case we can't split parameter, we must
2237  // send it to stack. We also must set NCRN to R4, so waste all
2238  // remained registers.
2239  const unsigned NSAAOffset = State->getNextStackOffset();
2240  if (NSAAOffset != 0 && Size > Excess) {
2241  while (State->AllocateReg(GPRArgRegs))
2242  ;
2243  return;
2244  }
2245 
2246  // First register for byval parameter is the first register that wasn't
2247  // allocated before this method call, so it would be "reg".
2248  // If parameter is small enough to be saved in range [reg, r4), then
2249  // the end (first after last) register would be reg + param-size-in-regs,
2250  // else parameter would be splitted between registers and stack,
2251  // end register would be r4 in this case.
2252  unsigned ByValRegBegin = Reg;
2253  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2254  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2255  // Note, first register is allocated in the beginning of function already,
2256  // allocate remained amount of registers we need.
2257  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2258  State->AllocateReg(GPRArgRegs);
2259  // A byval parameter that is split between registers and memory needs its
2260  // size truncated here.
2261  // In the case where the entire structure fits in registers, we set the
2262  // size in memory to zero.
2263  Size = std::max<int>(Size - Excess, 0);
2264 }
2265 
2266 /// MatchingStackOffset - Return true if the given stack call argument is
2267 /// already available in the same position (relatively) of the caller's
2268 /// incoming argument stack.
2269 static
2272  const TargetInstrInfo *TII) {
2273  unsigned Bytes = Arg.getValueSizeInBits() / 8;
2274  int FI = std::numeric_limits<int>::max();
2275  if (Arg.getOpcode() == ISD::CopyFromReg) {
2276  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2278  return false;
2279  MachineInstr *Def = MRI->getVRegDef(VR);
2280  if (!Def)
2281  return false;
2282  if (!Flags.isByVal()) {
2283  if (!TII->isLoadFromStackSlot(*Def, FI))
2284  return false;
2285  } else {
2286  return false;
2287  }
2288  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2289  if (Flags.isByVal())
2290  // ByVal argument is passed in as a pointer but it's now being
2291  // dereferenced. e.g.
2292  // define @foo(%struct.X* %A) {
2293  // tail call @bar(%struct.X* byval %A)
2294  // }
2295  return false;
2296  SDValue Ptr = Ld->getBasePtr();
2297  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2298  if (!FINode)
2299  return false;
2300  FI = FINode->getIndex();
2301  } else
2302  return false;
2303 
2305  if (!MFI.isFixedObjectIndex(FI))
2306  return false;
2307  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2308 }
2309 
2310 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2311 /// for tail call optimization. Targets which want to do tail call
2312 /// optimization should implement this function.
2313 bool
2314 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2315  CallingConv::ID CalleeCC,
2316  bool isVarArg,
2317  bool isCalleeStructRet,
2318  bool isCallerStructRet,
2319  const SmallVectorImpl<ISD::OutputArg> &Outs,
2320  const SmallVectorImpl<SDValue> &OutVals,
2321  const SmallVectorImpl<ISD::InputArg> &Ins,
2322  SelectionDAG& DAG) const {
2323  MachineFunction &MF = DAG.getMachineFunction();
2324  const Function &CallerF = MF.getFunction();
2325  CallingConv::ID CallerCC = CallerF.getCallingConv();
2326 
2327  assert(Subtarget->supportsTailCall());
2328 
2329  // Tail calls to function pointers cannot be optimized for Thumb1 if the args
2330  // to the call take up r0-r3. The reason is that there are no legal registers
2331  // left to hold the pointer to the function to be called.
2332  if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2333  !isa<GlobalAddressSDNode>(Callee.getNode()))
2334  return false;
2335 
2336  // Look for obvious safe cases to perform tail call optimization that do not
2337  // require ABI changes. This is what gcc calls sibcall.
2338 
2339  // Exception-handling functions need a special set of instructions to indicate
2340  // a return to the hardware. Tail-calling another function would probably
2341  // break this.
2342  if (CallerF.hasFnAttribute("interrupt"))
2343  return false;
2344 
2345  // Also avoid sibcall optimization if either caller or callee uses struct
2346  // return semantics.
2347  if (isCalleeStructRet || isCallerStructRet)
2348  return false;
2349 
2350  // Externally-defined functions with weak linkage should not be
2351  // tail-called on ARM when the OS does not support dynamic
2352  // pre-emption of symbols, as the AAELF spec requires normal calls
2353  // to undefined weak functions to be replaced with a NOP or jump to the
2354  // next instruction. The behaviour of branch instructions in this
2355  // situation (as used for tail calls) is implementation-defined, so we
2356  // cannot rely on the linker replacing the tail call with a return.
2357  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2358  const GlobalValue *GV = G->getGlobal();
2359  const Triple &TT = getTargetMachine().getTargetTriple();
2360  if (GV->hasExternalWeakLinkage() &&
2361  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2362  return false;
2363  }
2364 
2365  // Check that the call results are passed in the same way.
2366  LLVMContext &C = *DAG.getContext();
2367  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2368  CCAssignFnForReturn(CalleeCC, isVarArg),
2369  CCAssignFnForReturn(CallerCC, isVarArg)))
2370  return false;
2371  // The callee has to preserve all registers the caller needs to preserve.
2372  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2373  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2374  if (CalleeCC != CallerCC) {
2375  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2376  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2377  return false;
2378  }
2379 
2380  // If Caller's vararg or byval argument has been split between registers and
2381  // stack, do not perform tail call, since part of the argument is in caller's
2382  // local frame.
2383  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2384  if (AFI_Caller->getArgRegsSaveSize())
2385  return false;
2386 
2387  // If the callee takes no arguments then go on to check the results of the
2388  // call.
2389  if (!Outs.empty()) {
2390  // Check if stack adjustment is needed. For now, do not do this if any
2391  // argument is passed on the stack.
2393  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2394  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2395  if (CCInfo.getNextStackOffset()) {
2396  // Check if the arguments are already laid out in the right way as
2397  // the caller's fixed stack objects.
2398  MachineFrameInfo &MFI = MF.getFrameInfo();
2399  const MachineRegisterInfo *MRI = &MF.getRegInfo();
2400  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2401  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2402  i != e;
2403  ++i, ++realArgIdx) {
2404  CCValAssign &VA = ArgLocs[i];
2405  EVT RegVT = VA.getLocVT();
2406  SDValue Arg = OutVals[realArgIdx];
2407  ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2408  if (VA.getLocInfo() == CCValAssign::Indirect)
2409  return false;
2410  if (VA.needsCustom()) {
2411  // f64 and vector types are split into multiple registers or
2412  // register/stack-slot combinations. The types will not match
2413  // the registers; give up on memory f64 refs until we figure
2414  // out what to do about this.
2415  if (!VA.isRegLoc())
2416  return false;
2417  if (!ArgLocs[++i].isRegLoc())
2418  return false;
2419  if (RegVT == MVT::v2f64) {
2420  if (!ArgLocs[++i].isRegLoc())
2421  return false;
2422  if (!ArgLocs[++i].isRegLoc())
2423  return false;
2424  }
2425  } else if (!VA.isRegLoc()) {
2426  if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2427  MFI, MRI, TII))
2428  return false;
2429  }
2430  }
2431  }
2432 
2433  const MachineRegisterInfo &MRI = MF.getRegInfo();
2434  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2435  return false;
2436  }
2437 
2438  return true;
2439 }
2440 
2441 bool
2442 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2443  MachineFunction &MF, bool isVarArg,
2444  const SmallVectorImpl<ISD::OutputArg> &Outs,
2445  LLVMContext &Context) const {
2447  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2448  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2449 }
2450 
2452  const SDLoc &DL, SelectionDAG &DAG) {
2453  const MachineFunction &MF = DAG.getMachineFunction();
2454  const Function &F = MF.getFunction();
2455 
2456  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2457 
2458  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2459  // version of the "preferred return address". These offsets affect the return
2460  // instruction if this is a return from PL1 without hypervisor extensions.
2461  // IRQ/FIQ: +4 "subs pc, lr, #4"
2462  // SWI: 0 "subs pc, lr, #0"
2463  // ABORT: +4 "subs pc, lr, #4"
2464  // UNDEF: +4/+2 "subs pc, lr, #0"
2465  // UNDEF varies depending on where the exception came from ARM or Thumb
2466  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2467 
2468  int64_t LROffset;
2469  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2470  IntKind == "ABORT")
2471  LROffset = 4;
2472  else if (IntKind == "SWI" || IntKind == "UNDEF")
2473  LROffset = 0;
2474  else
2475  report_fatal_error("Unsupported interrupt attribute. If present, value "
2476  "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2477 
2478  RetOps.insert(RetOps.begin() + 1,
2479  DAG.getConstant(LROffset, DL, MVT::i32, false));
2480 
2481  return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2482 }
2483 
2484 SDValue
2485 ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2486  bool isVarArg,
2487  const SmallVectorImpl<ISD::OutputArg> &Outs,
2488  const SmallVectorImpl<SDValue> &OutVals,
2489  const SDLoc &dl, SelectionDAG &DAG) const {
2490  // CCValAssign - represent the assignment of the return value to a location.
2492 
2493  // CCState - Info about the registers and stack slots.
2494  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2495  *DAG.getContext());
2496 
2497  // Analyze outgoing return values.
2498  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2499 
2500  SDValue Flag;
2501  SmallVector<SDValue, 4> RetOps;
2502  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2503  bool isLittleEndian = Subtarget->isLittle();
2504 
2505  MachineFunction &MF = DAG.getMachineFunction();
2507  AFI->setReturnRegsCount(RVLocs.size());
2508 
2509  // Copy the result values into the output registers.
2510  for (unsigned i = 0, realRVLocIdx = 0;
2511  i != RVLocs.size();
2512  ++i, ++realRVLocIdx) {
2513  CCValAssign &VA = RVLocs[i];
2514  assert(VA.isRegLoc() && "Can only return in registers!");
2515 
2516  SDValue Arg = OutVals[realRVLocIdx];
2517  bool ReturnF16 = false;
2518 
2519  if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
2520  // Half-precision return values can be returned like this:
2521  //
2522  // t11 f16 = fadd ...
2523  // t12: i16 = bitcast t11
2524  // t13: i32 = zero_extend t12
2525  // t14: f32 = bitcast t13 <~~~~~~~ Arg
2526  //
2527  // to avoid code generation for bitcasts, we simply set Arg to the node
2528  // that produces the f16 value, t11 in this case.
2529  //
2530  if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
2531  SDValue ZE = Arg.getOperand(0);
2532  if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
2533  SDValue BC = ZE.getOperand(0);
2534  if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
2535  Arg = BC.getOperand(0);
2536  ReturnF16 = true;
2537  }
2538  }
2539  }
2540  }
2541 
2542  switch (VA.getLocInfo()) {
2543  default: llvm_unreachable("Unknown loc info!");
2544  case CCValAssign::Full: break;
2545  case CCValAssign::BCvt:
2546  if (!ReturnF16)
2547  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2548  break;
2549  }
2550 
2551  if (VA.needsCustom()) {
2552  if (VA.getLocVT() == MVT::v2f64) {
2553  // Extract the first half and return it in two registers.
2554  SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2555  DAG.getConstant(0, dl, MVT::i32));
2556  SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2557  DAG.getVTList(MVT::i32, MVT::i32), Half);
2558 
2559  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2560  HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2561  Flag);
2562  Flag = Chain.getValue(1);
2563  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2564  VA = RVLocs[++i]; // skip ahead to next loc
2565  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2566  HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2567  Flag);
2568  Flag = Chain.getValue(1);
2569  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2570  VA = RVLocs[++i]; // skip ahead to next loc
2571 
2572  // Extract the 2nd half and fall through to handle it as an f64 value.
2573  Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2574  DAG.getConstant(1, dl, MVT::i32));
2575  }
2576  // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2577  // available.
2578  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2579  DAG.getVTList(MVT::i32, MVT::i32), Arg);
2580  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2581  fmrrd.getValue(isLittleEndian ? 0 : 1),
2582  Flag);
2583  Flag = Chain.getValue(1);
2584  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2585  VA = RVLocs[++i]; // skip ahead to next loc
2586  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2587  fmrrd.getValue(isLittleEndian ? 1 : 0),
2588  Flag);
2589  } else
2590  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2591 
2592  // Guarantee that all emitted copies are
2593  // stuck together, avoiding something bad.
2594  Flag = Chain.getValue(1);
2595  RetOps.push_back(DAG.getRegister(VA.getLocReg(),
2596  ReturnF16 ? MVT::f16 : VA.getLocVT()));
2597  }
2598  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2599  const MCPhysReg *I =
2601  if (I) {
2602  for (; *I; ++I) {
2603  if (ARM::GPRRegClass.contains(*I))
2604  RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2605  else if (ARM::DPRRegClass.contains(*I))
2606  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2607  else
2608  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2609  }
2610  }
2611 
2612  // Update chain and glue.
2613  RetOps[0] = Chain;
2614  if (Flag.getNode())
2615  RetOps.push_back(Flag);
2616 
2617  // CPUs which aren't M-class use a special sequence to return from
2618  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2619  // though we use "subs pc, lr, #N").
2620  //
2621  // M-class CPUs actually use a normal return sequence with a special
2622  // (hardware-provided) value in LR, so the normal code path works.
2623  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
2624  !Subtarget->isMClass()) {
2625  if (Subtarget->isThumb1Only())
2626  report_fatal_error("interrupt attribute is not supported in Thumb1");
2627  return LowerInterruptReturn(RetOps, dl, DAG);
2628  }
2629 
2630  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2631 }
2632 
2633 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2634  if (N->getNumValues() != 1)
2635  return false;
2636  if (!N->hasNUsesOfValue(1, 0))
2637  return false;
2638 
2639  SDValue TCChain = Chain;
2640  SDNode *Copy = *N->use_begin();
2641  if (Copy->getOpcode() == ISD::CopyToReg) {
2642  // If the copy has a glue operand, we conservatively assume it isn't safe to
2643  // perform a tail call.
2644  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2645  return false;
2646  TCChain = Copy->getOperand(0);
2647  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2648  SDNode *VMov = Copy;
2649  // f64 returned in a pair of GPRs.
2651  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2652  UI != UE; ++UI) {
2653  if (UI->getOpcode() != ISD::CopyToReg)
2654  return false;
2655  Copies.insert(*UI);
2656  }
2657  if (Copies.size() > 2)
2658  return false;
2659 
2660  for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2661  UI != UE; ++UI) {
2662  SDValue UseChain = UI->getOperand(0);
2663  if (Copies.count(UseChain.getNode()))
2664  // Second CopyToReg
2665  Copy = *UI;
2666  else {
2667  // We are at the top of this chain.
2668  // If the copy has a glue operand, we conservatively assume it
2669  // isn't safe to perform a tail call.
2670  if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2671  return false;
2672  // First CopyToReg
2673  TCChain = UseChain;
2674  }
2675  }
2676  } else if (Copy->getOpcode() == ISD::BITCAST) {
2677  // f32 returned in a single GPR.
2678  if (!Copy->hasOneUse())
2679  return false;
2680  Copy = *Copy->use_begin();
2681  if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2682  return false;
2683  // If the copy has a glue operand, we conservatively assume it isn't safe to
2684  // perform a tail call.
2685  if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2686  return false;
2687  TCChain = Copy->getOperand(0);
2688  } else {
2689  return false;
2690  }
2691 
2692  bool HasRet = false;
2693  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2694  UI != UE; ++UI) {
2695  if (UI->getOpcode() != ARMISD::RET_FLAG &&
2696  UI->getOpcode() != ARMISD::INTRET_FLAG)
2697  return false;
2698  HasRet = true;
2699  }
2700 
2701  if (!HasRet)
2702  return false;
2703 
2704  Chain = TCChain;
2705  return true;
2706 }
2707 
2708 bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2709  if (!Subtarget->supportsTailCall())
2710  return false;
2711 
2712  auto Attr =
2713  CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2714  if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2715  return false;
2716 
2717  return true;
2718 }
2719 
2720 // Trying to write a 64 bit value so need to split into two 32 bit values first,
2721 // and pass the lower and high parts through.
2723  SDLoc DL(Op);
2724  SDValue WriteValue = Op->getOperand(2);
2725 
2726  // This function is only supposed to be called for i64 type argument.
2727  assert(WriteValue.getValueType() == MVT::i64
2728  && "LowerWRITE_REGISTER called for non-i64 type argument.");
2729 
2730  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2731  DAG.getConstant(0, DL, MVT::i32));
2732  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2733  DAG.getConstant(1, DL, MVT::i32));
2734  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2735  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2736 }
2737 
2738 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2739 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2740 // one of the above mentioned nodes. It has to be wrapped because otherwise
2741 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2742 // be used to form addressing mode. These wrapped nodes will be selected
2743 // into MOVi.
2744 SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2745  SelectionDAG &DAG) const {
2746  EVT PtrVT = Op.getValueType();
2747  // FIXME there is no actual debug info here
2748  SDLoc dl(Op);
2749  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2750  SDValue Res;
2751 
2752  // When generating execute-only code Constant Pools must be promoted to the
2753  // global data section. It's a bit ugly that we can't share them across basic
2754  // blocks, but this way we guarantee that execute-only behaves correct with
2755  // position-independent addressing modes.
2756  if (Subtarget->genExecuteOnly()) {
2757  auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2758  auto T = const_cast<Type*>(CP->getType());
2759  auto C = const_cast<Constant*>(CP->getConstVal());
2760  auto M = const_cast<Module*>(DAG.getMachineFunction().
2761  getFunction().getParent());
2762  auto GV = new GlobalVariable(
2763  *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
2764  Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
2765  Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
2766  Twine(AFI->createPICLabelUId())
2767  );
2768  SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
2769  dl, PtrVT);
2770  return LowerGlobalAddress(GA, DAG);
2771  }
2772 
2773  if (CP->isMachineConstantPoolEntry())
2774  Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2775  CP->getAlignment());
2776  else
2777  Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2778  CP->getAlignment());
2779  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2780 }
2781 
2784 }
2785 
2786 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2787  SelectionDAG &DAG) const {
2788  MachineFunction &MF = DAG.getMachineFunction();
2790  unsigned ARMPCLabelIndex = 0;
2791  SDLoc DL(Op);
2792  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2793  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2794  SDValue CPAddr;
2795  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2796  if (!IsPositionIndependent) {
2797  CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2798  } else {
2799  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2800  ARMPCLabelIndex = AFI->createPICLabelUId();
2801  ARMConstantPoolValue *CPV =
2802  ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2803  ARMCP::CPBlockAddress, PCAdj);
2804  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2805  }
2806  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2807  SDValue Result = DAG.getLoad(
2808  PtrVT, DL, DAG.getEntryNode(), CPAddr,
2810  if (!IsPositionIndependent)
2811  return Result;
2812  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2813  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2814 }
2815 
2816 /// Convert a TLS address reference into the correct sequence of loads
2817 /// and calls to compute the variable's address for Darwin, and return an
2818 /// SDValue containing the final node.
2819 
2820 /// Darwin only has one TLS scheme which must be capable of dealing with the
2821 /// fully general situation, in the worst case. This means:
2822 /// + "extern __thread" declaration.
2823 /// + Defined in a possibly unknown dynamic library.
2824 ///
2825 /// The general system is that each __thread variable has a [3 x i32] descriptor
2826 /// which contains information used by the runtime to calculate the address. The
2827 /// only part of this the compiler needs to know about is the first word, which
2828 /// contains a function pointer that must be called with the address of the
2829 /// entire descriptor in "r0".
2830 ///
2831 /// Since this descriptor may be in a different unit, in general access must
2832 /// proceed along the usual ARM rules. A common sequence to produce is:
2833 ///
2834 /// movw rT1, :lower16:_var$non_lazy_ptr
2835 /// movt rT1, :upper16:_var$non_lazy_ptr
2836 /// ldr r0, [rT1]
2837 /// ldr rT2, [r0]
2838 /// blx rT2
2839 /// [...address now in r0...]
2840 SDValue
2841 ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2842  SelectionDAG &DAG) const {
2843  assert(Subtarget->isTargetDarwin() &&
2844  "This function expects a Darwin target");
2845  SDLoc DL(Op);
2846 
2847  // First step is to get the address of the actua global symbol. This is where
2848  // the TLS descriptor lives.
2849  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
2850 
2851  // The first entry in the descriptor is a function pointer that we must call
2852  // to obtain the address of the variable.
2853  SDValue Chain = DAG.getEntryNode();
2854  SDValue FuncTLVGet = DAG.getLoad(
2855  MVT::i32, DL, Chain, DescAddr,
2857  /* Alignment = */ 4,
2860  Chain = FuncTLVGet.getValue(1);
2861 
2863  MachineFrameInfo &MFI = F.getFrameInfo();
2864  MFI.setAdjustsStack(true);
2865 
2866  // TLS calls preserve all registers except those that absolutely must be
2867  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
2868  // silly).
2869  auto TRI =
2870  getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
2871  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
2872  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
2873 
2874  // Finally, we can make the call. This is just a degenerate version of a
2875  // normal AArch64 call node: r0 takes the address of the descriptor, and
2876  // returns the address of the variable in this thread.
2877  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
2878  Chain =
2880  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
2881  DAG.getRegisterMask(Mask), Chain.getValue(1));
2882  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
2883 }
2884 
2885 SDValue
2886 ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
2887  SelectionDAG &DAG) const {
2888  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
2889 
2890  SDValue Chain = DAG.getEntryNode();
2891  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2892  SDLoc DL(Op);
2893 
2894  // Load the current TEB (thread environment block)
2895  SDValue Ops[] = {Chain,
2896  DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
2897  DAG.getConstant(15, DL, MVT::i32),
2898  DAG.getConstant(0, DL, MVT::i32),
2899  DAG.getConstant(13, DL, MVT::i32),
2900  DAG.getConstant(0, DL, MVT::i32),
2901  DAG.getConstant(2, DL, MVT::i32)};
2902  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
2903  DAG.getVTList(MVT::i32, MVT::Other), Ops);
2904 
2905  SDValue TEB = CurrentTEB.getValue(0);
2906  Chain = CurrentTEB.getValue(1);
2907 
2908  // Load the ThreadLocalStoragePointer from the TEB
2909  // A pointer to the TLS array is located at offset 0x2c from the TEB.
2910  SDValue TLSArray =
2911  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
2912  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
2913 
2914  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
2915  // offset into the TLSArray.
2916 
2917  // Load the TLS index from the C runtime
2918  SDValue TLSIndex =
2919  DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
2920  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
2921  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
2922 
2923  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
2924  DAG.getConstant(2, DL, MVT::i32));
2925  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
2926  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
2927  MachinePointerInfo());
2928 
2929  // Get the offset of the start of the .tls section (section base)
2930  const auto *GA = cast<GlobalAddressSDNode>(Op);
2931  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
2932  SDValue Offset = DAG.getLoad(
2933  PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
2934  DAG.getTargetConstantPool(CPV, PtrVT, 4)),
2936 
2937  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
2938 }
2939 
2940 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
2941 SDValue
2942 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2943  SelectionDAG &DAG) const {
2944  SDLoc dl(GA);
2945  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2946  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2947  MachineFunction &MF = DAG.getMachineFunction();
2949  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2950  ARMConstantPoolValue *CPV =
2951  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2952  ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2953  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2954  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2955  Argument = DAG.getLoad(
2956  PtrVT, dl, DAG.getEntryNode(), Argument,
2958  SDValue Chain = Argument.getValue(1);
2959 
2960  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2961  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2962 
2963  // call __tls_get_addr.
2964  ArgListTy Args;
2965  ArgListEntry Entry;
2966  Entry.Node = Argument;
2967  Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2968  Args.push_back(Entry);
2969 
2970  // FIXME: is there useful debug info available here?
2972  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2974  DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
2975 
2976  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2977  return CallResult.first;
2978 }
2979 
2980 // Lower ISD::GlobalTLSAddress using the "initial exec" or
2981 // "local exec" model.
2982 SDValue
2983 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2984  SelectionDAG &DAG,
2985  TLSModel::Model model) const {
2986  const GlobalValue *GV = GA->getGlobal();
2987  SDLoc dl(GA);
2988  SDValue Offset;
2989  SDValue Chain = DAG.getEntryNode();
2990  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2991  // Get the Thread Pointer
2993 
2994  if (model == TLSModel::InitialExec) {
2995  MachineFunction &MF = DAG.getMachineFunction();
2997  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2998  // Initial exec model.
2999  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3000  ARMConstantPoolValue *CPV =
3001  ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3003  true);
3004  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3005  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3006  Offset = DAG.getLoad(
3007  PtrVT, dl, Chain, Offset,
3009  Chain = Offset.getValue(1);
3010 
3011  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3012  Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3013 
3014  Offset = DAG.getLoad(
3015  PtrVT, dl, Chain, Offset,
3017  } else {
3018  // local exec model
3019  assert(model == TLSModel::LocalExec);
3020  ARMConstantPoolValue *CPV =
3022  Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3023  Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3024  Offset = DAG.getLoad(
3025  PtrVT, dl, Chain, Offset,
3027  }
3028 
3029  // The address of the thread local variable is the add of the thread
3030  // pointer with the offset of the variable.
3031  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3032 }
3033 
3034 SDValue
3035 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3036  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3037  if (DAG.getTarget().useEmulatedTLS())
3038  return LowerToTLSEmulatedModel(GA, DAG);
3039 
3040  if (Subtarget->isTargetDarwin())
3041  return LowerGlobalTLSAddressDarwin(Op, DAG);
3042 
3043  if (Subtarget->isTargetWindows())
3044  return LowerGlobalTLSAddressWindows(Op, DAG);
3045 
3046  // TODO: implement the "local dynamic" model
3047  assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3049 
3050  switch (model) {
3053  return LowerToTLSGeneralDynamicModel(GA, DAG);
3054  case TLSModel::InitialExec:
3055  case TLSModel::LocalExec:
3056  return LowerToTLSExecModels(GA, DAG, model);
3057  }
3058  llvm_unreachable("bogus TLS model");
3059 }
3060 
3061 /// Return true if all users of V are within function F, looking through
3062 /// ConstantExprs.
3063 static bool allUsersAreInFunction(const Value *V, const Function *F) {
3064  SmallVector<const User*,4> Worklist;
3065  for (auto *U : V->users())
3066  Worklist.push_back(U);
3067  while (!Worklist.empty()) {
3068  auto *U = Worklist.pop_back_val();
3069  if (isa<ConstantExpr>(U)) {
3070  for (auto *UU : U->users())
3071  Worklist.push_back(UU);
3072  continue;
3073  }
3074 
3075  auto *I = dyn_cast<Instruction>(U);
3076  if (!I || I->getParent()->getParent() != F)
3077  return false;
3078  }
3079  return true;
3080 }
3081 
3083  const GlobalValue *GV, SelectionDAG &DAG,
3084  EVT PtrVT, const SDLoc &dl) {
3085  // If we're creating a pool entry for a constant global with unnamed address,
3086  // and the global is small enough, we can emit it inline into the constant pool
3087  // to save ourselves an indirection.
3088  //
3089  // This is a win if the constant is only used in one function (so it doesn't
3090  // need to be duplicated) or duplicating the constant wouldn't increase code
3091  // size (implying the constant is no larger than 4 bytes).
3092  const Function &F = DAG.getMachineFunction().getFunction();
3093 
3094  // We rely on this decision to inline being idemopotent and unrelated to the
3095  // use-site. We know that if we inline a variable at one use site, we'll
3096  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3097  // doesn't know about this optimization, so bail out if it's enabled else
3098  // we could decide to inline here (and thus never emit the GV) but require
3099  // the GV from fast-isel generated code.
3100  if (!EnableConstpoolPromotion ||
3102  return SDValue();
3103 
3104  auto *GVar = dyn_cast<GlobalVariable>(GV);
3105  if (!GVar || !GVar->hasInitializer() ||
3106  !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3107  !GVar->hasLocalLinkage())
3108  return SDValue();
3109 
3110  // If we inline a value that contains relocations, we move the relocations
3111  // from .data to .text. This is not allowed in position-independent code.
3112  auto *Init = GVar->getInitializer();
3113  if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3114  Init->needsRelocation())
3115  return SDValue();
3116 
3117  // The constant islands pass can only really deal with alignment requests
3118  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3119  // any type wanting greater alignment requirements than 4 bytes. We also
3120  // can only promote constants that are multiples of 4 bytes in size or
3121  // are paddable to a multiple of 4. Currently we only try and pad constants
3122  // that are strings for simplicity.
3123  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3124  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3125  unsigned Align = DAG.getDataLayout().getPreferredAlignment(GVar);
3126  unsigned RequiredPadding = 4 - (Size % 4);
3127  bool PaddingPossible =
3128  RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3129  if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3130  Size == 0)
3131  return SDValue();
3132 
3133  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3134  MachineFunction &MF = DAG.getMachineFunction();
3136 
3137  // We can't bloat the constant pool too much, else the ConstantIslands pass
3138  // may fail to converge. If we haven't promoted this global yet (it may have
3139  // multiple uses), and promoting it would increase the constant pool size (Sz
3140  // > 4), ensure we have space to do so up to MaxTotal.
3141  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3142  if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3144  return SDValue();
3145 
3146  // This is only valid if all users are in a single function; we can't clone
3147  // the constant in general. The LLVM IR unnamed_addr allows merging
3148  // constants, but not cloning them.
3149  //
3150  // We could potentially allow cloning if we could prove all uses of the
3151  // constant in the current function don't care about the address, like
3152  // printf format strings. But that isn't implemented for now.
3153  if (!allUsersAreInFunction(GVar, &F))
3154  return SDValue();
3155 
3156  // We're going to inline this global. Pad it out if needed.
3157  if (RequiredPadding != 4) {
3158  StringRef S = CDAInit->getAsString();
3159 
3161  std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3162  while (RequiredPadding--)
3163  V.push_back(0);
3164  Init = ConstantDataArray::get(*DAG.getContext(), V);
3165  }
3166 
3167  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3168  SDValue CPAddr =
3169  DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3170  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3173  PaddedSize - 4);
3174  }
3175  ++NumConstpoolPromoted;
3176  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3177 }
3178 
3180  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3181  if (!(GV = GA->getBaseObject()))
3182  return false;
3183  if (const auto *V = dyn_cast<GlobalVariable>(GV))
3184  return V->isConstant();
3185  return isa<Function>(GV);
3186 }
3187 
3188 SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3189  SelectionDAG &DAG) const {
3190  switch (Subtarget->getTargetTriple().getObjectFormat()) {
3191  default: llvm_unreachable("unknown object format");
3192  case Triple::COFF:
3193  return LowerGlobalAddressWindows(Op, DAG);
3194  case Triple::ELF:
3195  return LowerGlobalAddressELF(Op, DAG);
3196  case Triple::MachO:
3197  return LowerGlobalAddressDarwin(Op, DAG);
3198  }
3199 }
3200 
3201 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3202  SelectionDAG &DAG) const {
3203  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3204  SDLoc dl(Op);
3205  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3206  const TargetMachine &TM = getTargetMachine();
3207  bool IsRO = isReadOnly(GV);
3208 
3209  // promoteToConstantPool only if not generating XO text section
3210  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3211  if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3212  return V;
3213 
3214  if (isPositionIndependent()) {
3215  bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3216  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3217  UseGOT_PREL ? ARMII::MO_GOT : 0);
3218  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3219  if (UseGOT_PREL)
3220  Result =
3221  DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3223  return Result;
3224  } else if (Subtarget->isROPI() && IsRO) {
3225  // PC-relative.
3226  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3227  SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3228  return Result;
3229  } else if (Subtarget->isRWPI() && !IsRO) {
3230  // SB-relative.
3231  SDValue RelAddr;
3232  if (Subtarget->useMovt()) {
3233  ++NumMovwMovt;
3234  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3235  RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3236  } else { // use literal pool for address constant
3237  ARMConstantPoolValue *CPV =
3239  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3240  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3241  RelAddr = DAG.getLoad(
3242  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3244  }
3245  SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3246  SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3247  return Result;
3248  }
3249 
3250  // If we have T2 ops, we can materialize the address directly via movt/movw
3251  // pair. This is always cheaper.
3252  if (Subtarget->useMovt()) {
3253  ++NumMovwMovt;
3254  // FIXME: Once remat is capable of dealing with instructions with register
3255  // operands, expand this into two nodes.
3256  return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3257  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3258  } else {
3259  SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3260  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3261  return DAG.getLoad(
3262  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3264  }
3265 }
3266 
3267 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3268  SelectionDAG &DAG) const {
3269  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3270  "ROPI/RWPI not currently supported for Darwin");
3271  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3272  SDLoc dl(Op);
3273  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3274 
3275  if (Subtarget->useMovt())
3276  ++NumMovwMovt;
3277 
3278  // FIXME: Once remat is capable of dealing with instructions with register
3279  // operands, expand this into multiple nodes
3280  unsigned Wrapper =
3282 
3283  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3284  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3285 
3286  if (Subtarget->isGVIndirectSymbol(GV))
3287  Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3289  return Result;
3290 }
3291 
3292 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3293  SelectionDAG &DAG) const {
3294  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3295  assert(Subtarget->useMovt() &&
3296  "Windows on ARM expects to use movw/movt");
3297  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3298  "ROPI/RWPI not currently supported for Windows");
3299 
3300  const TargetMachine &TM = getTargetMachine();
3301  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3303  if (GV->hasDLLImportStorageClass())
3304  TargetFlags = ARMII::MO_DLLIMPORT;
3305  else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
3306  TargetFlags = ARMII::MO_COFFSTUB;
3307  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3308  SDValue Result;
3309  SDLoc DL(Op);
3310 
3311  ++NumMovwMovt;
3312 
3313  // FIXME: Once remat is capable of dealing with instructions with register
3314  // operands, expand this into two nodes.
3315  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3316  DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
3317  TargetFlags));
3318  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
3319  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3321  return Result;
3322 }
3323 
3324 SDValue
3325 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3326  SDLoc dl(Op);
3327  SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3328  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3329  DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3330  Op.getOperand(1), Val);
3331 }
3332 
3333 SDValue
3334 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3335  SDLoc dl(Op);
3336  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3337  Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3338 }
3339 
3340 SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3341  SelectionDAG &DAG) const {
3342  SDLoc dl(Op);
3344  Op.getOperand(0));
3345 }
3346 
3347 SDValue
3348 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3349  const ARMSubtarget *Subtarget) const {
3350  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3351  SDLoc dl(Op);
3352  switch (IntNo) {
3353  default: return SDValue(); // Don't custom lower most intrinsics.
3354  case Intrinsic::thread_pointer: {
3355  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3356  return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3357  }
3358  case Intrinsic::eh_sjlj_lsda: {
3359  MachineFunction &MF = DAG.getMachineFunction();
3361  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3362  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3363  SDValue CPAddr;
3364  bool IsPositionIndependent = isPositionIndependent();
3365  unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3366  ARMConstantPoolValue *CPV =
3367  ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
3368  ARMCP::CPLSDA, PCAdj);
3369  CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3370  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3371  SDValue Result = DAG.getLoad(
3372  PtrVT, dl, DAG.getEntryNode(), CPAddr,
3374 
3375  if (IsPositionIndependent) {
3376  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3377  Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3378  }
3379  return Result;
3380  }
3381  case Intrinsic::arm_neon_vabs:
3382  return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3383  Op.getOperand(1));
3384  case Intrinsic::arm_neon_vmulls:
3385  case Intrinsic::arm_neon_vmullu: {
3386  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3388  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3389  Op.getOperand(1), Op.getOperand(2));
3390  }
3391  case Intrinsic::arm_neon_vminnm:
3392  case Intrinsic::arm_neon_vmaxnm: {
3393  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3395  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3396  Op.getOperand(1), Op.getOperand(2));
3397  }
3398  case Intrinsic::arm_neon_vminu:
3399  case Intrinsic::arm_neon_vmaxu: {
3400  if (Op.getValueType().isFloatingPoint())
3401  return SDValue();
3402  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3403  ? ISD::UMIN : ISD::UMAX;
3404  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3405  Op.getOperand(1), Op.getOperand(2));
3406  }
3407  case Intrinsic::arm_neon_vmins:
3408  case Intrinsic::arm_neon_vmaxs: {
3409  // v{min,max}s is overloaded between signed integers and floats.
3410  if (!Op.getValueType().isFloatingPoint()) {
3411  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3412  ? ISD::SMIN : ISD::SMAX;
3413  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3414  Op.getOperand(1), Op.getOperand(2));
3415  }
3416  unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3418  return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3419  Op.getOperand(1), Op.getOperand(2));
3420  }
3421  case Intrinsic::arm_neon_vtbl1:
3422  return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3423  Op.getOperand(1), Op.getOperand(2));
3424  case Intrinsic::arm_neon_vtbl2:
3425  return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3426  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3427  }
3428 }
3429 
3431  const ARMSubtarget *Subtarget) {
3432  SDLoc dl(Op);
3433  ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3434  auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3435  if (SSID == SyncScope::SingleThread)
3436  return Op;
3437 
3438  if (!Subtarget->hasDataBarrier()) {
3439  // Some ARMv6 cpus can support data barriers with an mcr instruction.
3440  // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3441  // here.
3442  assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
3443  "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
3444  return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3445  DAG.getConstant(0, dl, MVT::i32));
3446  }
3447 
3448  ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3449  AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3450  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3451  if (Subtarget->isMClass()) {
3452  // Only a full system barrier exists in the M-class architectures.
3453  Domain = ARM_MB::SY;
3454  } else if (Subtarget->preferISHSTBarriers() &&
3455  Ord == AtomicOrdering::Release) {
3456  // Swift happens to implement ISHST barriers in a way that's compatible with
3457  // Release semantics but weaker than ISH so we'd be fools not to use
3458  // it. Beware: other processors probably don't!
3459  Domain = ARM_MB::ISHST;
3460  }
3461 
3462  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3463  DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3464  DAG.getConstant(Domain, dl, MVT::i32));
3465 }
3466 
3468  const ARMSubtarget *Subtarget) {
3469  // ARM pre v5TE and Thumb1 does not have preload instructions.
3470  if (!(Subtarget->isThumb2() ||
3471  (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3472  // Just preserve the chain.
3473  return Op.getOperand(0);
3474 
3475  SDLoc dl(Op);
3476  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3477  if (!isRead &&
3478  (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3479  // ARMv7 with MP extension has PLDW.
3480  return Op.getOperand(0);
3481 
3482  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3483  if (Subtarget->isThumb()) {
3484  // Invert the bits.
3485  isRead = ~isRead & 1;
3486  isData = ~isData & 1;
3487  }
3488 
3489  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3490  Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3491  DAG.getConstant(isData, dl, MVT::i32));
3492 }
3493 
3495  MachineFunction &MF = DAG.getMachineFunction();
3496  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3497 
3498  // vastart just stores the address of the VarArgsFrameIndex slot into the
3499  // memory location argument.
3500  SDLoc dl(Op);
3501  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3502  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3503  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3504  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3505  MachinePointerInfo(SV));
3506 }
3507 
3508 SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3509  CCValAssign &NextVA,
3510  SDValue &Root,
3511  SelectionDAG &DAG,
3512  const SDLoc &dl) const {
3513  MachineFunction &MF = DAG.getMachineFunction();
3515 
3516  const TargetRegisterClass *RC;
3517  if (AFI->isThumb1OnlyFunction())
3518  RC = &ARM::tGPRRegClass;
3519  else
3520  RC = &ARM::GPRRegClass;
3521 
3522  // Transform the arguments stored in physical registers into virtual ones.
3523  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3524  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3525 
3526  SDValue ArgValue2;
3527  if (NextVA.isMemLoc()) {
3528  MachineFrameInfo &MFI = MF.getFrameInfo();
3529  int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3530 
3531  // Create load node to retrieve arguments from the stack.
3532  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3533  ArgValue2 = DAG.getLoad(
3534  MVT::i32, dl, Root, FIN,
3536  } else {
3537  Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3538  ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3539  }
3540  if (!Subtarget->isLittle())
3541  std::swap (ArgValue, ArgValue2);
3542  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3543 }
3544 
3545 // The remaining GPRs hold either the beginning of variable-argument
3546 // data, or the beginning of an aggregate passed by value (usually
3547 // byval). Either way, we allocate stack slots adjacent to the data
3548 // provided by our caller, and store the unallocated registers there.
3549 // If this is a variadic function, the va_list pointer will begin with
3550 // these values; otherwise, this reassembles a (byval) structure that
3551 // was split between registers and memory.
3552 // Return: The frame index registers were stored into.
3553 int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3554  const SDLoc &dl, SDValue &Chain,
3555  const Value *OrigArg,
3556  unsigned InRegsParamRecordIdx,
3557  int ArgOffset, unsigned ArgSize) const {
3558  // Currently, two use-cases possible:
3559  // Case #1. Non-var-args function, and we meet first byval parameter.
3560  // Setup first unallocated register as first byval register;
3561  // eat all remained registers
3562  // (these two actions are performed by HandleByVal method).
3563  // Then, here, we initialize stack frame with
3564  // "store-reg" instructions.
3565  // Case #2. Var-args function, that doesn't contain byval parameters.
3566  // The same: eat all remained unallocated registers,
3567  // initialize stack frame.
3568 
3569  MachineFunction &MF = DAG.getMachineFunction();
3570  MachineFrameInfo &MFI = MF.getFrameInfo();
3572  unsigned RBegin, REnd;
3573  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3574  CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3575  } else {
3576  unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3577  RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3578  REnd = ARM::R4;
3579  }
3580 
3581  if (REnd != RBegin)
3582  ArgOffset = -4 * (ARM::R4 - RBegin);
3583 
3584  auto PtrVT = getPointerTy(DAG.getDataLayout());
3585  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3586  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3587 
3588  SmallVector<SDValue, 4> MemOps;
3589  const TargetRegisterClass *RC =
3590  AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3591 
3592  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3593  unsigned VReg = MF.addLiveIn(Reg, RC);
3594  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3595  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3596  MachinePointerInfo(OrigArg, 4 * i));
3597  MemOps.push_back(Store);
3598  FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3599  }
3600 
3601  if (!MemOps.empty())
3602  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3603  return FrameIndex;
3604 }
3605 
3606 // Setup stack frame, the va_list pointer will start from.
3607 void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3608  const SDLoc &dl, SDValue &Chain,
3609  unsigned ArgOffset,
3610  unsigned TotalArgRegsSaveSize,
3611  bool ForceMutable) const {
3612  MachineFunction &MF = DAG.getMachineFunction();
3614 
3615  // Try to store any remaining integer argument regs
3616  // to their spots on the stack so that they may be loaded by dereferencing
3617  // the result of va_next.
3618  // If there is no regs to be stored, just point address after last
3619  // argument passed via stack.
3620  int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3621  CCInfo.getInRegsParamsCount(),
3622  CCInfo.getNextStackOffset(), 4);
3623  AFI->setVarArgsFrameIndex(FrameIndex);
3624 }
3625 
3626 SDValue ARMTargetLowering::LowerFormalArguments(
3627  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3628  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3629  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3630  MachineFunction &MF = DAG.getMachineFunction();
3631  MachineFrameInfo &MFI = MF.getFrameInfo();
3632 
3634 
3635  // Assign locations to all of the incoming arguments.
3637  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3638  *DAG.getContext());
3639  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3640 
3641  SmallVector<SDValue, 16> ArgValues;
3642  SDValue ArgValue;
3644  unsigned CurArgIdx = 0;
3645 
3646  // Initially ArgRegsSaveSize is zero.
3647  // Then we increase this value each time we meet byval parameter.
3648  // We also increase this value in case of varargs function.
3649  AFI->setArgRegsSaveSize(0);
3650 
3651  // Calculate the amount of stack space that we need to allocate to store
3652  // byval and variadic arguments that are passed in registers.
3653  // We need to know this before we allocate the first byval or variadic
3654  // argument, as they will be allocated a stack slot below the CFA (Canonical
3655  // Frame Address, the stack pointer at entry to the function).
3656  unsigned ArgRegBegin = ARM::R4;
3657  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3658  if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3659  break;
3660 
3661  CCValAssign &VA = ArgLocs[i];
3662  unsigned Index = VA.getValNo();
3663  ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3664  if (!Flags.isByVal())
3665  continue;
3666 
3667  assert(VA.isMemLoc() && "unexpected byval pointer in reg");
3668  unsigned RBegin, REnd;
3669  CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3670  ArgRegBegin = std::min(ArgRegBegin, RBegin);
3671 
3672  CCInfo.nextInRegsParam();
3673  }
3674  CCInfo.rewindByValRegsInfo();
3675 
3676  int lastInsIndex = -1;
3677  if (isVarArg && MFI.hasVAStart()) {
3678  unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3679  if (RegIdx != array_lengthof(GPRArgRegs))
3680  ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3681  }
3682 
3683  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3684  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3685  auto PtrVT = getPointerTy(DAG.getDataLayout());
3686 
3687  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3688  CCValAssign &VA = ArgLocs[i];
3689  if (Ins[VA.getValNo()].isOrigArg()) {
3690  std::advance(CurOrigArg,
3691  Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3692  CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3693  }
3694  // Arguments stored in registers.
3695  if (VA.isRegLoc()) {
3696  EVT RegVT = VA.getLocVT();
3697 
3698  if (VA.needsCustom()) {
3699  // f64 and vector types are split up into multiple registers or
3700  // combinations of registers and stack slots.
3701  if (VA.getLocVT() == MVT::v2f64) {
3702  SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3703  Chain, DAG, dl);
3704  VA = ArgLocs[++i]; // skip ahead to next loc
3705  SDValue ArgValue2;
3706  if (VA.isMemLoc()) {
3707  int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3708  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3709  ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3711  DAG.getMachineFunction(), FI));
3712  } else {
3713  ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3714  Chain, DAG, dl);
3715  }
3716  ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3717  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3718  ArgValue, ArgValue1,
3719  DAG.getIntPtrConstant(0, dl));
3720  ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3721  ArgValue, ArgValue2,
3722  DAG.getIntPtrConstant(1, dl));
3723  } else
3724  ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3725  } else {
3726  const TargetRegisterClass *RC;
3727 
3728 
3729  if (RegVT == MVT::f16)
3730  RC = &ARM::HPRRegClass;
3731  else if (RegVT == MVT::f32)
3732  RC = &ARM::SPRRegClass;
3733  else if (RegVT == MVT::f64 || RegVT == MVT::v4f16)
3734  RC = &ARM::DPRRegClass;
3735  else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16)
3736  RC = &ARM::QPRRegClass;
3737  else if (RegVT == MVT::i32)
3738  RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3739  : &ARM::GPRRegClass;
3740  else
3741  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
3742 
3743  // Transform the arguments in physical registers into virtual ones.
3744  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3745  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3746  }
3747 
3748  // If this is an 8 or 16-bit value, it is really passed promoted
3749  // to 32 bits. Insert an assert[sz]ext to capture this, then
3750  // truncate to the right size.
3751  switch (VA.getLocInfo()) {
3752  default: llvm_unreachable("Unknown loc info!");
3753  case CCValAssign::Full: break;
3754  case CCValAssign::BCvt:
3755  ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3756  break;
3757  case CCValAssign::SExt:
3758  ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3759  DAG.getValueType(VA.getValVT()));
3760  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3761  break;
3762  case CCValAssign::ZExt:
3763  ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3764  DAG.getValueType(VA.getValVT()));
3765  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3766  break;
3767  }
3768 
3769  InVals.push_back(ArgValue);
3770  } else { // VA.isRegLoc()
3771  // sanity check
3772  assert(VA.isMemLoc());
3773  assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
3774 
3775  int index = VA.getValNo();
3776 
3777  // Some Ins[] entries become multiple ArgLoc[] entries.
3778  // Process them only once.
3779  if (index != lastInsIndex)
3780  {
3781  ISD::ArgFlagsTy Flags = Ins[index].Flags;
3782  // FIXME: For now, all byval parameter objects are marked mutable.
3783  // This can be changed with more analysis.
3784  // In case of tail call optimization mark all arguments mutable.
3785  // Since they could be overwritten by lowering of arguments in case of
3786  // a tail call.
3787  if (Flags.isByVal()) {
3788  assert(Ins[index].isOrigArg() &&
3789  "Byval arguments cannot be implicit");
3790  unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3791 
3792  int FrameIndex = StoreByValRegs(
3793  CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3794  VA.getLocMemOffset(), Flags.getByValSize());
3795  InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3796  CCInfo.nextInRegsParam();
3797  } else {
3798  unsigned FIOffset = VA.getLocMemOffset();
3799  int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3800  FIOffset, true);
3801 
3802  // Create load nodes to retrieve arguments from the stack.
3803  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3804  InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3806  DAG.getMachineFunction(), FI)));
3807  }
3808  lastInsIndex = index;
3809  }
3810  }
3811  }
3812 
3813  // varargs
3814  if (isVarArg && MFI.hasVAStart())
3815  VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3816  CCInfo.getNextStackOffset(),
3817  TotalArgRegsSaveSize);
3818 
3820 
3821  return Chain;
3822 }
3823 
3824 /// isFloatingPointZero - Return true if this is +0.0.
3825 static bool isFloatingPointZero(SDValue Op) {
3826  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3827  return CFP->getValueAPF().isPosZero();
3828  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3829  // Maybe this has already been legalized into the constant pool?
3830  if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3831  SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3832  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3833  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3834  return CFP->getValueAPF().isPosZero();
3835  }
3836  } else if (Op->getOpcode() == ISD::BITCAST &&
3837  Op->getValueType(0) == MVT::f64) {
3838  // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3839  // created by LowerConstantFP().
3840  SDValue BitcastOp = Op->getOperand(0);
3841  if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3842  isNullConstant(BitcastOp->getOperand(0)))
3843  return true;
3844  }
3845  return false;
3846 }
3847 
3848 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3849 /// the given operands.
3850 SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3851  SDValue &ARMcc, SelectionDAG &DAG,
3852  const SDLoc &dl) const {
3853  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3854  unsigned C = RHSC->getZExtValue();
3855  if (!isLegalICmpImmediate((int32_t)C)) {
3856  // Constant does not fit, try adjusting it by one.
3857  switch (CC) {
3858  default: break;
3859  case ISD::SETLT:
3860  case ISD::SETGE:
3861  if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3862  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3863  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3864  }
3865  break;
3866  case ISD::SETULT:
3867  case ISD::SETUGE:
3868  if (C != 0 && isLegalICmpImmediate(C-1)) {
3869  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3870  RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3871  }
3872  break;
3873  case ISD::SETLE:
3874  case ISD::SETGT:
3875  if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3876  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3877  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3878  }
3879  break;
3880  case ISD::SETULE:
3881  case ISD::SETUGT:
3882  if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3883  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3884  RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3885  }
3886  break;
3887  }
3888  }
3889  } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
3891  // In ARM and Thumb-2, the compare instructions can shift their second
3892  // operand.
3894  std::swap(LHS, RHS);
3895  }
3896 
3898  ARMISD::NodeType CompareType;
3899  switch (CondCode) {
3900  default:
3901  CompareType = ARMISD::CMP;
3902  break;
3903  case ARMCC::EQ:
3904  case ARMCC::NE:
3905  // Uses only Z Flag
3906  CompareType = ARMISD::CMPZ;
3907  break;
3908  }
3909  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3910  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3911 }
3912 
3913 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3914 SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
3915  SelectionDAG &DAG, const SDLoc &dl,
3916  bool InvalidOnQNaN) const {
3917  assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
3918  SDValue Cmp;
3919  SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
3920  if (!isFloatingPointZero(RHS))
3921  Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
3922  else
3923  Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
3924  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3925 }
3926 
3927 /// duplicateCmp - Glue values can have only one use, so this function
3928 /// duplicates a comparison node.
3929 SDValue
3930 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3931  unsigned Opc = Cmp.getOpcode();
3932  SDLoc DL(Cmp);
3933  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3934  return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3935 
3936  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
3937  Cmp = Cmp.getOperand(0);
3938  Opc = Cmp.getOpcode();
3939  if (Opc == ARMISD::CMPFP)
3940  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3941  Cmp.getOperand(1), Cmp.getOperand(2));
3942  else {
3943  assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
3944  Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3945  Cmp.getOperand(1));
3946  }
3947  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3948 }
3949 
3950 // This function returns three things: the arithmetic computation itself
3951 // (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
3952 // comparison and the condition code define the case in which the arithmetic
3953 // computation *does not* overflow.
3954 std::pair<SDValue, SDValue>
3955 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3956  SDValue &ARMcc) const {
3957  assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
3958 
3959  SDValue Value, OverflowCmp;
3960  SDValue LHS = Op.getOperand(0);
3961  SDValue RHS = Op.getOperand(1);
3962  SDLoc dl(Op);
3963 
3964  // FIXME: We are currently always generating CMPs because we don't support
3965  // generating CMN through the backend. This is not as good as the natural
3966  // CMP case because it causes a register dependency and cannot be folded
3967  // later.
3968 
3969  switch (Op.getOpcode()) {
3970  default:
3971  llvm_unreachable("Unknown overflow instruction!");
3972  case ISD::SADDO:
3973  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3974  Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3975  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3976  break;
3977  case ISD::UADDO:
3978  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3979  // We use ADDC here to correspond to its use in LowerUnsignedALUO.
3980  // We do not use it in the USUBO case as Value may not be used.
3981  Value = DAG.getNode(ARMISD::ADDC, dl,
3982  DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
3983  .getValue(0);
3984  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3985  break;
3986  case ISD::SSUBO:
3987  ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3988  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3989  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3990  break;
3991  case ISD::USUBO:
3992  ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3993  Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3994  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3995  break;
3996  case ISD::UMULO:
3997  // We generate a UMUL_LOHI and then check if the high word is 0.
3998  ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
3999  Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4000  DAG.getVTList(Op.getValueType(), Op.getValueType()),
4001  LHS, RHS);
4002  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4003  DAG.getConstant(0, dl, MVT::i32));
4004  Value = Value.getValue(0); // We only want the low 32 bits for the result.
4005  break;
4006  case ISD::SMULO:
4007  // We generate a SMUL_LOHI and then check if all the bits of the high word
4008  // are the same as the sign bit of the low word.
4009  ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4010  Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4011  DAG.getVTList(Op.getValueType(), Op.getValueType()),
4012  LHS, RHS);
4013  OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4014  DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4015  Value.getValue(0),
4016  DAG.getConstant(31, dl, MVT::i32)));
4017  Value = Value.getValue(0); // We only want the low 32 bits for the result.
4018  break;
4019  } // switch (...)
4020 
4021  return std::make_pair(Value, OverflowCmp);
4022 }
4023 
4024 SDValue
4025 ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4026  // Let legalize expand this if it isn't a legal type yet.
4028  return SDValue();
4029 
4030  SDValue Value, OverflowCmp;
4031  SDValue ARMcc;
4032  std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4033  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4034  SDLoc dl(Op);
4035  // We use 0 and 1 as false and true values.
4036  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4037  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4038  EVT VT = Op.getValueType();
4039 
4040  SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
4041  ARMcc, CCR, OverflowCmp);
4042 
4043  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4044  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4045 }
4046 
4048  SelectionDAG &DAG) {
4049  SDLoc DL(BoolCarry);
4050  EVT CarryVT = BoolCarry.getValueType();
4051 
4052  // This converts the boolean value carry into the carry flag by doing
4053  // ARMISD::SUBC Carry, 1
4054  SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
4055  DAG.getVTList(CarryVT, MVT::i32),
4056  BoolCarry, DAG.getConstant(1, DL, CarryVT));
4057  return Carry.getValue(1);
4058 }
4059 
4061  SelectionDAG &DAG) {
4062  SDLoc DL(Flags);
4063 
4064  // Now convert the carry flag into a boolean carry. We do this
4065  // using ARMISD:ADDE 0, 0, Carry
4066  return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
4067  DAG.getConstant(0, DL, MVT::i32),
4068  DAG.getConstant(0, DL, MVT::i32), Flags);
4069 }
4070 
4071 SDValue ARMTargetLowering::LowerUnsi