LLVM 20.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/Statistic.h"
38#include "llvm/ADT/StringRef.h"
40#include "llvm/ADT/Twine.h"
67#include "llvm/IR/Attributes.h"
68#include "llvm/IR/CallingConv.h"
69#include "llvm/IR/Constant.h"
70#include "llvm/IR/Constants.h"
71#include "llvm/IR/DataLayout.h"
72#include "llvm/IR/DebugLoc.h"
74#include "llvm/IR/Function.h"
75#include "llvm/IR/GlobalAlias.h"
76#include "llvm/IR/GlobalValue.h"
78#include "llvm/IR/IRBuilder.h"
79#include "llvm/IR/InlineAsm.h"
80#include "llvm/IR/Instruction.h"
83#include "llvm/IR/Intrinsics.h"
84#include "llvm/IR/IntrinsicsARM.h"
85#include "llvm/IR/Module.h"
86#include "llvm/IR/Type.h"
87#include "llvm/IR/User.h"
88#include "llvm/IR/Value.h"
89#include "llvm/MC/MCInstrDesc.h"
91#include "llvm/MC/MCSchedule.h"
98#include "llvm/Support/Debug.h"
106#include <algorithm>
107#include <cassert>
108#include <cstdint>
109#include <cstdlib>
110#include <iterator>
111#include <limits>
112#include <optional>
113#include <tuple>
114#include <utility>
115#include <vector>
116
117using namespace llvm;
118using namespace llvm::PatternMatch;
119
120#define DEBUG_TYPE "arm-isel"
121
122STATISTIC(NumTailCalls, "Number of tail calls");
123STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
124STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
125STATISTIC(NumConstpoolPromoted,
126 "Number of constants with their storage promoted into constant pools");
127
128static cl::opt<bool>
129ARMInterworking("arm-interworking", cl::Hidden,
130 cl::desc("Enable / disable ARM interworking (for debugging only)"),
131 cl::init(true));
132
134 "arm-promote-constant", cl::Hidden,
135 cl::desc("Enable / disable promotion of unnamed_addr constants into "
136 "constant pools"),
137 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
139 "arm-promote-constant-max-size", cl::Hidden,
140 cl::desc("Maximum size of constant to promote into a constant pool"),
141 cl::init(64));
143 "arm-promote-constant-max-total", cl::Hidden,
144 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
145 cl::init(128));
146
148MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
149 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
150 cl::init(2));
151
152/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
153constexpr MVT FlagsVT = MVT::i32;
154
155// The APCS parameter registers.
156static const MCPhysReg GPRArgRegs[] = {
157 ARM::R0, ARM::R1, ARM::R2, ARM::R3
158};
159
161 SelectionDAG &DAG, const SDLoc &DL) {
163 assert(Arg.ArgVT.bitsLT(MVT::i32));
164 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, Arg.ArgVT, Value);
165 SDValue Ext =
167 MVT::i32, Trunc);
168 return Ext;
169}
170
171void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
172 if (VT != PromotedLdStVT) {
174 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
175
177 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
178 }
179
180 MVT ElemTy = VT.getVectorElementType();
181 if (ElemTy != MVT::f64)
185 if (ElemTy == MVT::i32) {
190 } else {
195 }
204 if (VT.isInteger()) {
208 }
209
210 // Neon does not support vector divide/remainder operations.
219
220 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
221 for (auto Opcode : {ISD::ABS, ISD::ABDS, ISD::ABDU, ISD::SMIN, ISD::SMAX,
223 setOperationAction(Opcode, VT, Legal);
224 if (!VT.isFloatingPoint())
225 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
226 setOperationAction(Opcode, VT, Legal);
227}
228
229void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
230 addRegisterClass(VT, &ARM::DPRRegClass);
231 addTypeForNEON(VT, MVT::f64);
232}
233
234void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
235 addRegisterClass(VT, &ARM::DPairRegClass);
236 addTypeForNEON(VT, MVT::v2f64);
237}
238
239void ARMTargetLowering::setAllExpand(MVT VT) {
240 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
241 setOperationAction(Opc, VT, Expand);
242
243 // We support these really simple operations even on types where all
244 // the actual arithmetic has to be broken down into simpler
245 // operations or turned into library calls.
250}
251
252void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
253 LegalizeAction Action) {
254 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
255 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
256 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
257}
258
259void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
260 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
261
262 for (auto VT : IntTypes) {
263 addRegisterClass(VT, &ARM::MQPRRegClass);
293
294 // No native support for these.
304
305 // Vector reductions
315
316 if (!HasMVEFP) {
321 } else {
324 }
325
326 // Pre and Post inc are supported on loads and stores
327 for (unsigned im = (unsigned)ISD::PRE_INC;
333 }
334 }
335
336 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
337 for (auto VT : FloatTypes) {
338 addRegisterClass(VT, &ARM::MQPRRegClass);
339 if (!HasMVEFP)
340 setAllExpand(VT);
341
342 // These are legal or custom whether we have MVE.fp or not
355
356 // Pre and Post inc are supported on loads and stores
357 for (unsigned im = (unsigned)ISD::PRE_INC;
363 }
364
365 if (HasMVEFP) {
373
374 // No native support for these.
389 }
390 }
391
392 // Custom Expand smaller than legal vector reductions to prevent false zero
393 // items being added.
402
403 // We 'support' these types up to bitcast/load/store level, regardless of
404 // MVE integer-only / float support. Only doing FP data processing on the FP
405 // vector types is inhibited at integer-only level.
406 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
407 for (auto VT : LongTypes) {
408 addRegisterClass(VT, &ARM::MQPRRegClass);
409 setAllExpand(VT);
415 }
417
418 // We can do bitwise operations on v2i64 vectors
419 setOperationAction(ISD::AND, MVT::v2i64, Legal);
420 setOperationAction(ISD::OR, MVT::v2i64, Legal);
421 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
422
423 // It is legal to extload from v4i8 to v4i16 or v4i32.
424 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
425 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
426 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
427
428 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
434
435 // Some truncating stores are legal too.
436 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
437 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
438 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
439
440 // Pre and Post inc on these are legal, given the correct extends
441 for (unsigned im = (unsigned)ISD::PRE_INC;
443 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
448 }
449 }
450
451 // Predicate types
452 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
453 for (auto VT : pTypes) {
454 addRegisterClass(VT, &ARM::VCCRRegClass);
469
470 if (!HasMVEFP) {
475 }
476 }
480 setOperationAction(ISD::OR, MVT::v2i1, Expand);
486
495}
496
498 const ARMSubtarget &STI)
499 : TargetLowering(TM), Subtarget(&STI) {
500 RegInfo = Subtarget->getRegisterInfo();
501 Itins = Subtarget->getInstrItineraryData();
502
505
506 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
507 !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
508 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
509 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
510 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
511 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
513 }
514
515 if (Subtarget->isTargetMachO()) {
516 // Uses VFP for Thumb libfuncs if available.
517 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
518 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
519 static const struct {
520 const RTLIB::Libcall Op;
521 const char * const Name;
522 const ISD::CondCode Cond;
523 } LibraryCalls[] = {
524 // Single-precision floating-point arithmetic.
525 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
526 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
527 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
528 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
529
530 // Double-precision floating-point arithmetic.
531 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
532 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
533 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
534 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
535
536 // Single-precision comparisons.
537 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
538 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
539 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
540 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
541 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
542 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
543 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
544
545 // Double-precision comparisons.
546 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
547 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
548 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
549 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
550 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
551 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
552 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
553
554 // Floating-point to integer conversions.
555 // i64 conversions are done via library routines even when generating VFP
556 // instructions, so use the same ones.
557 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
558 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
559 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
560 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
561
562 // Conversions between floating types.
563 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
564 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
565
566 // Integer to floating-point conversions.
567 // i64 conversions are done via library routines even when generating VFP
568 // instructions, so use the same ones.
569 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
570 // e.g., __floatunsidf vs. __floatunssidfvfp.
571 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
572 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
573 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
574 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
575 };
576
577 for (const auto &LC : LibraryCalls) {
578 setLibcallName(LC.Op, LC.Name);
579 if (LC.Cond != ISD::SETCC_INVALID)
580 setCmpLibcallCC(LC.Op, LC.Cond);
581 }
582 }
583 }
584
585 // RTLIB
586 if (Subtarget->isAAPCS_ABI() &&
587 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
588 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
589 static const struct {
590 const RTLIB::Libcall Op;
591 const char * const Name;
592 const CallingConv::ID CC;
593 const ISD::CondCode Cond;
594 } LibraryCalls[] = {
595 // Double-precision floating-point arithmetic helper functions
596 // RTABI chapter 4.1.2, Table 2
597 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
598 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
599 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
600 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
601
602 // Double-precision floating-point comparison helper functions
603 // RTABI chapter 4.1.2, Table 3
604 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
605 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
606 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
607 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
608 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
609 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
610 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
611
612 // Single-precision floating-point arithmetic helper functions
613 // RTABI chapter 4.1.2, Table 4
614 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
615 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
616 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
617 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
618
619 // Single-precision floating-point comparison helper functions
620 // RTABI chapter 4.1.2, Table 5
621 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
622 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
623 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
624 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
625 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
626 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
627 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
628
629 // Floating-point to integer conversions.
630 // RTABI chapter 4.1.2, Table 6
631 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
634 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
635 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
636 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
639
640 // Conversions between floating types.
641 // RTABI chapter 4.1.2, Table 7
642 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
643 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
644 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
645
646 // Integer to floating-point conversions.
647 // RTABI chapter 4.1.2, Table 8
648 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
649 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
650 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
651 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
652 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
653 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
654 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
655 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
656
657 // Long long helper functions
658 // RTABI chapter 4.2, Table 9
659 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
660 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
661 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
662 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
663
664 // Integer division functions
665 // RTABI chapter 4.3.1
666 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
667 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
668 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
669 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
670 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
671 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
672 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
673 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
674 };
675
676 for (const auto &LC : LibraryCalls) {
677 setLibcallName(LC.Op, LC.Name);
678 setLibcallCallingConv(LC.Op, LC.CC);
679 if (LC.Cond != ISD::SETCC_INVALID)
680 setCmpLibcallCC(LC.Op, LC.Cond);
681 }
682
683 // EABI dependent RTLIB
684 if (TM.Options.EABIVersion == EABI::EABI4 ||
685 TM.Options.EABIVersion == EABI::EABI5) {
686 static const struct {
687 const RTLIB::Libcall Op;
688 const char *const Name;
689 const CallingConv::ID CC;
690 const ISD::CondCode Cond;
691 } MemOpsLibraryCalls[] = {
692 // Memory operations
693 // RTABI chapter 4.3.4
694 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
695 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
696 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
697 };
698
699 for (const auto &LC : MemOpsLibraryCalls) {
700 setLibcallName(LC.Op, LC.Name);
701 setLibcallCallingConv(LC.Op, LC.CC);
702 if (LC.Cond != ISD::SETCC_INVALID)
703 setCmpLibcallCC(LC.Op, LC.Cond);
704 }
705 }
706 }
707
708 if (Subtarget->isTargetWindows()) {
709 static const struct {
710 const RTLIB::Libcall Op;
711 const char * const Name;
712 const CallingConv::ID CC;
713 } LibraryCalls[] = {
714 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
715 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
716 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
717 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
718 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
719 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
720 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
721 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
722 };
723
724 for (const auto &LC : LibraryCalls) {
725 setLibcallName(LC.Op, LC.Name);
726 setLibcallCallingConv(LC.Op, LC.CC);
727 }
728 }
729
730 // Use divmod compiler-rt calls for iOS 5.0 and later.
731 if (Subtarget->isTargetMachO() &&
732 !(Subtarget->isTargetIOS() &&
733 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
734 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
735 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
736 }
737
738 // The half <-> float conversion functions are always soft-float on
739 // non-watchos platforms, but are needed for some targets which use a
740 // hard-float calling convention by default.
741 if (!Subtarget->isTargetWatchABI()) {
742 if (Subtarget->isAAPCS_ABI()) {
743 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
744 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
745 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
746 } else {
747 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
748 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
749 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
750 }
751 }
752
753 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
754 // a __gnu_ prefix (which is the default).
755 if (Subtarget->isTargetAEABI()) {
756 static const struct {
757 const RTLIB::Libcall Op;
758 const char * const Name;
759 const CallingConv::ID CC;
760 } LibraryCalls[] = {
761 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
762 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
763 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
764 };
765
766 for (const auto &LC : LibraryCalls) {
767 setLibcallName(LC.Op, LC.Name);
768 setLibcallCallingConv(LC.Op, LC.CC);
769 }
770 }
771
772 if (Subtarget->isThumb1Only())
773 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
774 else
775 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
776
777 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
778 Subtarget->hasFPRegs()) {
779 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
780 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
781
786
787 if (!Subtarget->hasVFP2Base())
788 setAllExpand(MVT::f32);
789 if (!Subtarget->hasFP64())
790 setAllExpand(MVT::f64);
791 }
792
793 if (Subtarget->hasFullFP16()) {
794 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
797
800 }
801
802 if (Subtarget->hasBF16()) {
803 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
804 setAllExpand(MVT::bf16);
805 if (!Subtarget->hasFullFP16())
807 } else {
810 }
811
813 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
814 setTruncStoreAction(VT, InnerVT, Expand);
815 addAllExtLoads(VT, InnerVT, Expand);
816 }
817
820
822 }
823
826
829
830 if (Subtarget->hasMVEIntegerOps())
831 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
832
833 // Combine low-overhead loop intrinsics so that we can lower i1 types.
834 if (Subtarget->hasLOB()) {
836 }
837
838 if (Subtarget->hasNEON()) {
839 addDRTypeForNEON(MVT::v2f32);
840 addDRTypeForNEON(MVT::v8i8);
841 addDRTypeForNEON(MVT::v4i16);
842 addDRTypeForNEON(MVT::v2i32);
843 addDRTypeForNEON(MVT::v1i64);
844
845 addQRTypeForNEON(MVT::v4f32);
846 addQRTypeForNEON(MVT::v2f64);
847 addQRTypeForNEON(MVT::v16i8);
848 addQRTypeForNEON(MVT::v8i16);
849 addQRTypeForNEON(MVT::v4i32);
850 addQRTypeForNEON(MVT::v2i64);
851
852 if (Subtarget->hasFullFP16()) {
853 addQRTypeForNEON(MVT::v8f16);
854 addDRTypeForNEON(MVT::v4f16);
855 }
856
857 if (Subtarget->hasBF16()) {
858 addQRTypeForNEON(MVT::v8bf16);
859 addDRTypeForNEON(MVT::v4bf16);
860 }
861 }
862
863 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
864 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
865 // none of Neon, MVE or VFP supports any arithmetic operations on it.
866 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
867 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
868 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
869 // FIXME: Code duplication: FDIV and FREM are expanded always, see
870 // ARMTargetLowering::addTypeForNEON method for details.
871 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
872 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
873 // FIXME: Create unittest.
874 // In another words, find a way when "copysign" appears in DAG with vector
875 // operands.
877 // FIXME: Code duplication: SETCC has custom operation action, see
878 // ARMTargetLowering::addTypeForNEON method for details.
880 // FIXME: Create unittest for FNEG and for FABS.
881 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
882 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
884 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
885 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
886 setOperationAction(ISD::FTAN, MVT::v2f64, Expand);
887 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
888 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
891 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
894 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
900 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
901 }
902
903 if (Subtarget->hasNEON()) {
904 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
905 // supported for v4f32.
907 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
908 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
909 setOperationAction(ISD::FTAN, MVT::v4f32, Expand);
910 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
911 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
914 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
922
923 // Mark v2f32 intrinsics.
925 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
926 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
927 setOperationAction(ISD::FTAN, MVT::v2f32, Expand);
928 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
929 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
932 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
940
941 // Neon does not support some operations on v1i64 and v2i64 types.
942 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
943 // Custom handling for some quad-vector types to detect VMULL.
944 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
945 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
946 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
947 // Custom handling for some vector types to avoid expensive expansions
948 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
950 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
952 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
953 // a destination type that is wider than the source, and nor does
954 // it have a FP_TO_[SU]INT instruction with a narrower destination than
955 // source.
964
967
968 // NEON does not have single instruction CTPOP for vectors with element
969 // types wider than 8-bits. However, custom lowering can leverage the
970 // v8i8/v16i8 vcnt instruction.
977
978 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
979 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
980
981 // NEON does not have single instruction CTTZ for vectors.
983 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
984 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
985 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
986
987 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
988 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
989 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
990 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
991
996
1001
1005 }
1006
1007 // NEON only has FMA instructions as of VFP4.
1008 if (!Subtarget->hasVFP4Base()) {
1009 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
1010 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
1011 }
1012
1015
1016 // It is legal to extload from v4i8 to v4i16 or v4i32.
1017 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
1018 MVT::v2i32}) {
1023 }
1024 }
1025
1026 for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1027 MVT::v4i32}) {
1032 }
1033 }
1034
1035 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1042 }
1043 if (Subtarget->hasMVEIntegerOps()) {
1046 ISD::SETCC});
1047 }
1048 if (Subtarget->hasMVEFloatOps()) {
1050 }
1051
1052 if (!Subtarget->hasFP64()) {
1053 // When targeting a floating-point unit with only single-precision
1054 // operations, f64 is legal for the few double-precision instructions which
1055 // are present However, no double-precision operations other than moves,
1056 // loads and stores are provided by the hardware.
1094 }
1095
1096 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1099 if (Subtarget->hasFullFP16()) {
1102 }
1103 }
1104
1105 if (!Subtarget->hasFP16()) {
1108 }
1109
1111
1112 // ARM does not have floating-point extending loads.
1113 for (MVT VT : MVT::fp_valuetypes()) {
1114 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1115 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
1116 setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
1117 }
1118
1119 // ... or truncating stores
1120 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1121 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1122 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1123 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
1124 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
1125
1126 // ARM does not have i1 sign extending load.
1127 for (MVT VT : MVT::integer_valuetypes())
1128 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
1129
1130 // ARM supports all 4 flavors of integer indexed load / store.
1131 if (!Subtarget->isThumb1Only()) {
1132 for (unsigned im = (unsigned)ISD::PRE_INC;
1134 setIndexedLoadAction(im, MVT::i1, Legal);
1135 setIndexedLoadAction(im, MVT::i8, Legal);
1136 setIndexedLoadAction(im, MVT::i16, Legal);
1137 setIndexedLoadAction(im, MVT::i32, Legal);
1138 setIndexedStoreAction(im, MVT::i1, Legal);
1139 setIndexedStoreAction(im, MVT::i8, Legal);
1140 setIndexedStoreAction(im, MVT::i16, Legal);
1141 setIndexedStoreAction(im, MVT::i32, Legal);
1142 }
1143 } else {
1144 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1147 }
1148
1153
1156 if (Subtarget->hasDSP()) {
1165 }
1166 if (Subtarget->hasBaseDSP()) {
1169 }
1170
1171 // i64 operation support.
1174 if (Subtarget->isThumb1Only()) {
1177 }
1178 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1179 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1181
1191
1192 // MVE lowers 64 bit shifts to lsll and lsrl
1193 // assuming that ISD::SRL and SRA of i64 are already marked custom
1194 if (Subtarget->hasMVEIntegerOps())
1196
1197 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1198 if (Subtarget->isThumb1Only()) {
1202 }
1203
1204 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1206
1207 // ARM does not have ROTL.
1212 }
1215 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1218 }
1219
1220 // @llvm.readcyclecounter requires the Performance Monitors extension.
1221 // Default to the 0 expansion on unsupported platforms.
1222 // FIXME: Technically there are older ARM CPUs that have
1223 // implementation-specific ways of obtaining this information.
1224 if (Subtarget->hasPerfMon())
1226
1227 // Only ARMv6 has BSWAP.
1228 if (!Subtarget->hasV6Ops())
1230
1231 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1232 : Subtarget->hasDivideInARMMode();
1233 if (!hasDivide) {
1234 // These are expanded into libcalls if the cpu doesn't have HW divider.
1237 }
1238
1239 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1242
1245 }
1246
1249
1250 // Register based DivRem for AEABI (RTABI 4.2)
1251 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1252 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1253 Subtarget->isTargetWindows()) {
1256 HasStandaloneRem = false;
1257
1258 if (Subtarget->isTargetWindows()) {
1259 const struct {
1260 const RTLIB::Libcall Op;
1261 const char * const Name;
1262 const CallingConv::ID CC;
1263 } LibraryCalls[] = {
1264 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1265 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1266 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1267 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1268
1269 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1270 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1271 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1272 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1273 };
1274
1275 for (const auto &LC : LibraryCalls) {
1276 setLibcallName(LC.Op, LC.Name);
1277 setLibcallCallingConv(LC.Op, LC.CC);
1278 }
1279 } else {
1280 const struct {
1281 const RTLIB::Libcall Op;
1282 const char * const Name;
1283 const CallingConv::ID CC;
1284 } LibraryCalls[] = {
1285 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1286 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1287 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1288 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1289
1290 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1291 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1292 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1293 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1294 };
1295
1296 for (const auto &LC : LibraryCalls) {
1297 setLibcallName(LC.Op, LC.Name);
1298 setLibcallCallingConv(LC.Op, LC.CC);
1299 }
1300 }
1301
1306 } else {
1309 }
1310
1315
1316 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1318
1319 // Use the default implementation.
1321 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1323 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1326
1327 if (Subtarget->isTargetWindows())
1329 else
1331
1332 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1333 // the default expansion.
1334 InsertFencesForAtomic = false;
1335 if (Subtarget->hasAnyDataBarrier() &&
1336 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1337 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1338 // to ldrex/strex loops already.
1340 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1342
1343 // On v8, we have particularly efficient implementations of atomic fences
1344 // if they can be combined with nearby atomic loads and stores.
1345 if (!Subtarget->hasAcquireRelease() ||
1346 getTargetMachine().getOptLevel() == CodeGenOptLevel::None) {
1347 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1348 InsertFencesForAtomic = true;
1349 }
1350 } else {
1351 // If there's anything we can use as a barrier, go through custom lowering
1352 // for ATOMIC_FENCE.
1353 // If target has DMB in thumb, Fences can be inserted.
1354 if (Subtarget->hasDataBarrier())
1355 InsertFencesForAtomic = true;
1356
1358 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1359
1360 // Set them all for libcall, which will force libcalls.
1373 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1374 // Unordered/Monotonic case.
1375 if (!InsertFencesForAtomic) {
1378 }
1379 }
1380
1381 // Compute supported atomic widths.
1382 if (Subtarget->isTargetLinux() ||
1383 (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1384 // For targets where __sync_* routines are reliably available, we use them
1385 // if necessary.
1386 //
1387 // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1388 // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1389 //
1390 // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1391 // such targets should provide __sync_* routines, which use the ARM mode
1392 // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1393 // encoding; see ARMISD::MEMBARRIER_MCR.)
1395 } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1396 Subtarget->hasForced32BitAtomics()) {
1397 // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1399 } else {
1400 // We can't assume anything about other targets; just use libatomic
1401 // routines.
1403 }
1404
1406
1408
1409 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1410 if (!Subtarget->hasV6Ops()) {
1413 }
1415
1416 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1417 !Subtarget->isThumb1Only()) {
1418 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1419 // iff target supports vfp2.
1429 }
1430
1431 // We want to custom lower some of our intrinsics.
1436 if (Subtarget->useSjLjEH())
1437 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1438
1448 if (Subtarget->hasFullFP16()) {
1452 }
1453
1455
1458 if (Subtarget->hasFullFP16())
1462 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1463
1464 // We don't support sin/cos/fmod/copysign/pow
1473 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1474 !Subtarget->isThumb1Only()) {
1477 }
1480
1481 if (!Subtarget->hasVFP4Base()) {
1484 }
1485
1486 // Various VFP goodness
1487 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1488 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1489 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1492 }
1493
1494 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1495 if (!Subtarget->hasFP16()) {
1498 }
1499
1500 // Strict floating-point comparisons need custom lowering.
1507 }
1508
1509 // Use __sincos_stret if available.
1510 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1511 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1514 }
1515
1516 // FP-ARMv8 implements a lot of rounding-like FP operations.
1517 if (Subtarget->hasFPARMv8Base()) {
1526 if (Subtarget->hasNEON()) {
1531 }
1532
1533 if (Subtarget->hasFP64()) {
1542 }
1543 }
1544
1545 // FP16 often need to be promoted to call lib functions
1546 if (Subtarget->hasFullFP16()) {
1561
1563 }
1564
1565 if (Subtarget->hasNEON()) {
1566 // vmin and vmax aren't available in a scalar form, so we can use
1567 // a NEON instruction with an undef lane instead.
1576
1577 if (Subtarget->hasFullFP16()) {
1582
1587 }
1588 }
1589
1590 // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
1591 // it, but it's just a wrapper around ldexp.
1592 if (Subtarget->isTargetWindows()) {
1594 if (isOperationExpand(Op, MVT::f32))
1595 setOperationAction(Op, MVT::f32, Promote);
1596 }
1597
1598 // LegalizeDAG currently can't expand fp16 LDEXP/FREXP on targets where i16
1599 // isn't legal.
1601 if (isOperationExpand(Op, MVT::f16))
1602 setOperationAction(Op, MVT::f16, Promote);
1603
1604 // We have target-specific dag combine patterns for the following nodes:
1605 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1608
1609 if (Subtarget->hasMVEIntegerOps())
1611
1612 if (Subtarget->hasV6Ops())
1614 if (Subtarget->isThumb1Only())
1616 // Attempt to lower smin/smax to ssat/usat
1617 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1618 Subtarget->isThumb2()) {
1620 }
1621
1623
1624 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1625 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1627 else
1629
1630 //// temporary - rewrite interface to use type
1633 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1635 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1637
1638 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1639 // are at least 4 bytes aligned.
1641
1642 // Prefer likely predicted branches to selects on out-of-order cores.
1643 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1644
1647 Align(1ULL << Subtarget->getPreferBranchLogAlignment()));
1648
1649 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1650}
1651
1653 return Subtarget->useSoftFloat();
1654}
1655
1656// FIXME: It might make sense to define the representative register class as the
1657// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1658// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1659// SPR's representative would be DPR_VFP2. This should work well if register
1660// pressure tracking were modified such that a register use would increment the
1661// pressure of the register class's representative and all of it's super
1662// classes' representatives transitively. We have not implemented this because
1663// of the difficulty prior to coalescing of modeling operand register classes
1664// due to the common occurrence of cross class copies and subregister insertions
1665// and extractions.
1666std::pair<const TargetRegisterClass *, uint8_t>
1668 MVT VT) const {
1669 const TargetRegisterClass *RRC = nullptr;
1670 uint8_t Cost = 1;
1671 switch (VT.SimpleTy) {
1672 default:
1674 // Use DPR as representative register class for all floating point
1675 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1676 // the cost is 1 for both f32 and f64.
1677 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1678 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1679 RRC = &ARM::DPRRegClass;
1680 // When NEON is used for SP, only half of the register file is available
1681 // because operations that define both SP and DP results will be constrained
1682 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1683 // coalescing by double-counting the SP regs. See the FIXME above.
1684 if (Subtarget->useNEONForSinglePrecisionFP())
1685 Cost = 2;
1686 break;
1687 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1688 case MVT::v4f32: case MVT::v2f64:
1689 RRC = &ARM::DPRRegClass;
1690 Cost = 2;
1691 break;
1692 case MVT::v4i64:
1693 RRC = &ARM::DPRRegClass;
1694 Cost = 4;
1695 break;
1696 case MVT::v8i64:
1697 RRC = &ARM::DPRRegClass;
1698 Cost = 8;
1699 break;
1700 }
1701 return std::make_pair(RRC, Cost);
1702}
1703
1704const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1705#define MAKE_CASE(V) \
1706 case V: \
1707 return #V;
1708 switch ((ARMISD::NodeType)Opcode) {
1710 break;
1913#undef MAKE_CASE
1914 }
1915 return nullptr;
1916}
1917
1919 EVT VT) const {
1920 if (!VT.isVector())
1921 return getPointerTy(DL);
1922
1923 // MVE has a predicate register.
1924 if ((Subtarget->hasMVEIntegerOps() &&
1925 (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1926 VT == MVT::v16i8)) ||
1927 (Subtarget->hasMVEFloatOps() &&
1928 (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1929 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1931}
1932
1933/// getRegClassFor - Return the register class that should be used for the
1934/// specified value type.
1935const TargetRegisterClass *
1936ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1937 (void)isDivergent;
1938 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1939 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1940 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1941 // MVE Q registers.
1942 if (Subtarget->hasNEON()) {
1943 if (VT == MVT::v4i64)
1944 return &ARM::QQPRRegClass;
1945 if (VT == MVT::v8i64)
1946 return &ARM::QQQQPRRegClass;
1947 }
1948 if (Subtarget->hasMVEIntegerOps()) {
1949 if (VT == MVT::v4i64)
1950 return &ARM::MQQPRRegClass;
1951 if (VT == MVT::v8i64)
1952 return &ARM::MQQQQPRRegClass;
1953 }
1955}
1956
1957// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1958// source/dest is aligned and the copy size is large enough. We therefore want
1959// to align such objects passed to memory intrinsics.
1961 Align &PrefAlign) const {
1962 if (!isa<MemIntrinsic>(CI))
1963 return false;
1964 MinSize = 8;
1965 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1966 // cycle faster than 4-byte aligned LDM.
1967 PrefAlign =
1968 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1969 return true;
1970}
1971
1972// Create a fast isel object.
1973FastISel *
1975 const TargetLibraryInfo *libInfo) const {
1976 return ARM::createFastISel(funcInfo, libInfo);
1977}
1978
1980 unsigned NumVals = N->getNumValues();
1981 if (!NumVals)
1982 return Sched::RegPressure;
1983
1984 for (unsigned i = 0; i != NumVals; ++i) {
1985 EVT VT = N->getValueType(i);
1986 if (VT == MVT::Glue || VT == MVT::Other)
1987 continue;
1988 if (VT.isFloatingPoint() || VT.isVector())
1989 return Sched::ILP;
1990 }
1991
1992 if (!N->isMachineOpcode())
1993 return Sched::RegPressure;
1994
1995 // Load are scheduled for latency even if there instruction itinerary
1996 // is not available.
1997 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1998 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1999
2000 if (MCID.getNumDefs() == 0)
2001 return Sched::RegPressure;
2002 if (!Itins->isEmpty() &&
2003 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2U)
2004 return Sched::ILP;
2005
2006 return Sched::RegPressure;
2007}
2008
2009//===----------------------------------------------------------------------===//
2010// Lowering Code
2011//===----------------------------------------------------------------------===//
2012
2013static bool isSRL16(const SDValue &Op) {
2014 if (Op.getOpcode() != ISD::SRL)
2015 return false;
2016 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2017 return Const->getZExtValue() == 16;
2018 return false;
2019}
2020
2021static bool isSRA16(const SDValue &Op) {
2022 if (Op.getOpcode() != ISD::SRA)
2023 return false;
2024 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2025 return Const->getZExtValue() == 16;
2026 return false;
2027}
2028
2029static bool isSHL16(const SDValue &Op) {
2030 if (Op.getOpcode() != ISD::SHL)
2031 return false;
2032 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2033 return Const->getZExtValue() == 16;
2034 return false;
2035}
2036
2037// Check for a signed 16-bit value. We special case SRA because it makes it
2038// more simple when also looking for SRAs that aren't sign extending a
2039// smaller value. Without the check, we'd need to take extra care with
2040// checking order for some operations.
2041static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
2042 if (isSRA16(Op))
2043 return isSHL16(Op.getOperand(0));
2044 return DAG.ComputeNumSignBits(Op) == 17;
2045}
2046
2047/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2049 switch (CC) {
2050 default: llvm_unreachable("Unknown condition code!");
2051 case ISD::SETNE: return ARMCC::NE;
2052 case ISD::SETEQ: return ARMCC::EQ;
2053 case ISD::SETGT: return ARMCC::GT;
2054 case ISD::SETGE: return ARMCC::GE;
2055 case ISD::SETLT: return ARMCC::LT;
2056 case ISD::SETLE: return ARMCC::LE;
2057 case ISD::SETUGT: return ARMCC::HI;
2058 case ISD::SETUGE: return ARMCC::HS;
2059 case ISD::SETULT: return ARMCC::LO;
2060 case ISD::SETULE: return ARMCC::LS;
2061 }
2062}
2063
2064/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2066 ARMCC::CondCodes &CondCode2) {
2067 CondCode2 = ARMCC::AL;
2068 switch (CC) {
2069 default: llvm_unreachable("Unknown FP condition!");
2070 case ISD::SETEQ:
2071 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
2072 case ISD::SETGT:
2073 case ISD::SETOGT: CondCode = ARMCC::GT; break;
2074 case ISD::SETGE:
2075 case ISD::SETOGE: CondCode = ARMCC::GE; break;
2076 case ISD::SETOLT: CondCode = ARMCC::MI; break;
2077 case ISD::SETOLE: CondCode = ARMCC::LS; break;
2078 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2079 case ISD::SETO: CondCode = ARMCC::VC; break;
2080 case ISD::SETUO: CondCode = ARMCC::VS; break;
2081 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2082 case ISD::SETUGT: CondCode = ARMCC::HI; break;
2083 case ISD::SETUGE: CondCode = ARMCC::PL; break;
2084 case ISD::SETLT:
2085 case ISD::SETULT: CondCode = ARMCC::LT; break;
2086 case ISD::SETLE:
2087 case ISD::SETULE: CondCode = ARMCC::LE; break;
2088 case ISD::SETNE:
2089 case ISD::SETUNE: CondCode = ARMCC::NE; break;
2090 }
2091}
2092
2093//===----------------------------------------------------------------------===//
2094// Calling Convention Implementation
2095//===----------------------------------------------------------------------===//
2096
2097/// getEffectiveCallingConv - Get the effective calling convention, taking into
2098/// account presence of floating point hardware and calling convention
2099/// limitations, such as support for variadic functions.
2101ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2102 bool isVarArg) const {
2103 switch (CC) {
2104 default:
2105 report_fatal_error("Unsupported calling convention");
2108 case CallingConv::GHC:
2110 return CC;
2116 case CallingConv::Swift:
2119 case CallingConv::C:
2120 case CallingConv::Tail:
2121 if (!Subtarget->isAAPCS_ABI())
2122 return CallingConv::ARM_APCS;
2123 else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
2124 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2125 !isVarArg)
2127 else
2129 case CallingConv::Fast:
2131 if (!Subtarget->isAAPCS_ABI()) {
2132 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2133 return CallingConv::Fast;
2134 return CallingConv::ARM_APCS;
2135 } else if (Subtarget->hasVFP2Base() &&
2136 !Subtarget->isThumb1Only() && !isVarArg)
2138 else
2140 }
2141}
2142
2144 bool isVarArg) const {
2145 return CCAssignFnForNode(CC, false, isVarArg);
2146}
2147
2149 bool isVarArg) const {
2150 return CCAssignFnForNode(CC, true, isVarArg);
2151}
2152
2153/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2154/// CallingConvention.
2155CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2156 bool Return,
2157 bool isVarArg) const {
2158 switch (getEffectiveCallingConv(CC, isVarArg)) {
2159 default:
2160 report_fatal_error("Unsupported calling convention");
2162 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2164 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2166 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2167 case CallingConv::Fast:
2168 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2169 case CallingConv::GHC:
2170 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2172 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2174 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2176 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2177 }
2178}
2179
2180SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2181 MVT LocVT, MVT ValVT, SDValue Val) const {
2182 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2183 Val);
2184 if (Subtarget->hasFullFP16()) {
2185 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2186 } else {
2187 Val = DAG.getNode(ISD::TRUNCATE, dl,
2188 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2189 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2190 }
2191 return Val;
2192}
2193
2194SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2195 MVT LocVT, MVT ValVT,
2196 SDValue Val) const {
2197 if (Subtarget->hasFullFP16()) {
2198 Val = DAG.getNode(ARMISD::VMOVrh, dl,
2199 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2200 } else {
2201 Val = DAG.getNode(ISD::BITCAST, dl,
2202 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2203 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2204 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2205 }
2206 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2207}
2208
2209/// LowerCallResult - Lower the result values of a call into the
2210/// appropriate copies out of appropriate physical registers.
2211SDValue ARMTargetLowering::LowerCallResult(
2212 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
2213 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2214 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2215 SDValue ThisVal, bool isCmseNSCall) const {
2216 // Assign locations to each value returned by this call.
2218 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2219 *DAG.getContext());
2220 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2221
2222 // Copy all of the result registers out of their specified physreg.
2223 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2224 CCValAssign VA = RVLocs[i];
2225
2226 // Pass 'this' value directly from the argument to return value, to avoid
2227 // reg unit interference
2228 if (i == 0 && isThisReturn) {
2229 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2230 "unexpected return calling convention register assignment");
2231 InVals.push_back(ThisVal);
2232 continue;
2233 }
2234
2235 SDValue Val;
2236 if (VA.needsCustom() &&
2237 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2238 // Handle f64 or half of a v2f64.
2239 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2240 InGlue);
2241 Chain = Lo.getValue(1);
2242 InGlue = Lo.getValue(2);
2243 VA = RVLocs[++i]; // skip ahead to next loc
2244 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2245 InGlue);
2246 Chain = Hi.getValue(1);
2247 InGlue = Hi.getValue(2);
2248 if (!Subtarget->isLittle())
2249 std::swap (Lo, Hi);
2250 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2251
2252 if (VA.getLocVT() == MVT::v2f64) {
2253 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2254 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2255 DAG.getConstant(0, dl, MVT::i32));
2256
2257 VA = RVLocs[++i]; // skip ahead to next loc
2258 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2259 Chain = Lo.getValue(1);
2260 InGlue = Lo.getValue(2);
2261 VA = RVLocs[++i]; // skip ahead to next loc
2262 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2263 Chain = Hi.getValue(1);
2264 InGlue = Hi.getValue(2);
2265 if (!Subtarget->isLittle())
2266 std::swap (Lo, Hi);
2267 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2268 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2269 DAG.getConstant(1, dl, MVT::i32));
2270 }
2271 } else {
2272 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2273 InGlue);
2274 Chain = Val.getValue(1);
2275 InGlue = Val.getValue(2);
2276 }
2277
2278 switch (VA.getLocInfo()) {
2279 default: llvm_unreachable("Unknown loc info!");
2280 case CCValAssign::Full: break;
2281 case CCValAssign::BCvt:
2282 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2283 break;
2284 }
2285
2286 // f16 arguments have their size extended to 4 bytes and passed as if they
2287 // had been copied to the LSBs of a 32-bit register.
2288 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2289 if (VA.needsCustom() &&
2290 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2291 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2292
2293 // On CMSE Non-secure Calls, call results (returned values) whose bitwidth
2294 // is less than 32 bits must be sign- or zero-extended after the call for
2295 // security reasons. Although the ABI mandates an extension done by the
2296 // callee, the latter cannot be trusted to follow the rules of the ABI.
2297 const ISD::InputArg &Arg = Ins[VA.getValNo()];
2298 if (isCmseNSCall && Arg.ArgVT.isScalarInteger() &&
2299 VA.getLocVT().isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
2300 Val = handleCMSEValue(Val, Arg, DAG, dl);
2301
2302 InVals.push_back(Val);
2303 }
2304
2305 return Chain;
2306}
2307
2308std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2309 const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2310 bool IsTailCall, int SPDiff) const {
2311 SDValue DstAddr;
2312 MachinePointerInfo DstInfo;
2313 int32_t Offset = VA.getLocMemOffset();
2315
2316 if (IsTailCall) {
2317 Offset += SPDiff;
2318 auto PtrVT = getPointerTy(DAG.getDataLayout());
2319 int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2320 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2321 DstAddr = DAG.getFrameIndex(FI, PtrVT);
2322 DstInfo =
2324 } else {
2325 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2326 DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2327 StackPtr, PtrOff);
2328 DstInfo =
2330 }
2331
2332 return std::make_pair(DstAddr, DstInfo);
2333}
2334
2335// Returns the type of copying which is required to set up a byval argument to
2336// a tail-called function. This isn't needed for non-tail calls, because they
2337// always need the equivalent of CopyOnce, but tail-calls sometimes need two to
2338// avoid clobbering another argument (CopyViaTemp), and sometimes can be
2339// optimised to zero copies when forwarding an argument from the caller's
2340// caller (NoCopy).
2341ARMTargetLowering::ByValCopyKind ARMTargetLowering::ByValNeedsCopyForTailCall(
2342 SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const {
2345
2346 // Globals are always safe to copy from.
2347 if (isa<GlobalAddressSDNode>(Src) || isa<ExternalSymbolSDNode>(Src))
2348 return CopyOnce;
2349
2350 // Can only analyse frame index nodes, conservatively assume we need a
2351 // temporary.
2352 auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src);
2353 auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst);
2354 if (!SrcFrameIdxNode || !DstFrameIdxNode)
2355 return CopyViaTemp;
2356
2357 int SrcFI = SrcFrameIdxNode->getIndex();
2358 int DstFI = DstFrameIdxNode->getIndex();
2359 assert(MFI.isFixedObjectIndex(DstFI) &&
2360 "byval passed in non-fixed stack slot");
2361
2362 int64_t SrcOffset = MFI.getObjectOffset(SrcFI);
2363 int64_t DstOffset = MFI.getObjectOffset(DstFI);
2364
2365 // If the source is in the local frame, then the copy to the argument memory
2366 // is always valid.
2367 bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
2368 if (!FixedSrc ||
2369 (FixedSrc && SrcOffset < -(int64_t)AFI->getArgRegsSaveSize()))
2370 return CopyOnce;
2371
2372 // In the case of byval arguments split between registers and the stack,
2373 // computeAddrForCallArg returns a FrameIndex which corresponds only to the
2374 // stack portion, but the Src SDValue will refer to the full value, including
2375 // the local stack memory that the register portion gets stored into. We only
2376 // need to compare them for equality, so normalise on the full value version.
2377 uint64_t RegSize = Flags.getByValSize() - MFI.getObjectSize(DstFI);
2378 DstOffset -= RegSize;
2379
2380 // If the value is already in the correct location, then no copying is
2381 // needed. If not, then we need to copy via a temporary.
2382 if (SrcOffset == DstOffset)
2383 return NoCopy;
2384 else
2385 return CopyViaTemp;
2386}
2387
2388void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2389 SDValue Chain, SDValue &Arg,
2390 RegsToPassVector &RegsToPass,
2391 CCValAssign &VA, CCValAssign &NextVA,
2392 SDValue &StackPtr,
2393 SmallVectorImpl<SDValue> &MemOpChains,
2394 bool IsTailCall,
2395 int SPDiff) const {
2396 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2397 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2398 unsigned id = Subtarget->isLittle() ? 0 : 1;
2399 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2400
2401 if (NextVA.isRegLoc())
2402 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2403 else {
2404 assert(NextVA.isMemLoc());
2405 if (!StackPtr.getNode())
2406 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2408
2409 SDValue DstAddr;
2410 MachinePointerInfo DstInfo;
2411 std::tie(DstAddr, DstInfo) =
2412 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2413 MemOpChains.push_back(
2414 DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2415 }
2416}
2417
2418static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2419 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2421}
2422
2423/// LowerCall - Lowering a call into a callseq_start <-
2424/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2425/// nodes.
2426SDValue
2427ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2428 SmallVectorImpl<SDValue> &InVals) const {
2429 SelectionDAG &DAG = CLI.DAG;
2430 SDLoc &dl = CLI.DL;
2432 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2434 SDValue Chain = CLI.Chain;
2435 SDValue Callee = CLI.Callee;
2436 bool &isTailCall = CLI.IsTailCall;
2437 CallingConv::ID CallConv = CLI.CallConv;
2438 bool doesNotRet = CLI.DoesNotReturn;
2439 bool isVarArg = CLI.IsVarArg;
2440
2445 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2446 bool isThisReturn = false;
2447 bool isCmseNSCall = false;
2448 bool isSibCall = false;
2449 bool PreferIndirect = false;
2450 bool GuardWithBTI = false;
2451
2452 // Analyze operands of the call, assigning locations to each operand.
2454 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2455 *DAG.getContext());
2456 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2457
2458 // Lower 'returns_twice' calls to a pseudo-instruction.
2459 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2460 !Subtarget->noBTIAtReturnTwice())
2461 GuardWithBTI = AFI->branchTargetEnforcement();
2462
2463 // Determine whether this is a non-secure function call.
2464 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2465 isCmseNSCall = true;
2466
2467 // Disable tail calls if they're not supported.
2468 if (!Subtarget->supportsTailCall())
2469 isTailCall = false;
2470
2471 // For both the non-secure calls and the returns from a CMSE entry function,
2472 // the function needs to do some extra work after the call, or before the
2473 // return, respectively, thus it cannot end with a tail call
2474 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2475 isTailCall = false;
2476
2477 if (isa<GlobalAddressSDNode>(Callee)) {
2478 // If we're optimizing for minimum size and the function is called three or
2479 // more times in this block, we can improve codesize by calling indirectly
2480 // as BLXr has a 16-bit encoding.
2481 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2482 if (CLI.CB) {
2483 auto *BB = CLI.CB->getParent();
2484 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2485 count_if(GV->users(), [&BB](const User *U) {
2486 return isa<Instruction>(U) &&
2487 cast<Instruction>(U)->getParent() == BB;
2488 }) > 2;
2489 }
2490 }
2491 if (isTailCall) {
2492 // Check if it's really possible to do a tail call.
2493 isTailCall =
2494 IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, PreferIndirect);
2495
2496 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2497 CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2498 isSibCall = true;
2499
2500 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2501 // detected sibcalls.
2502 if (isTailCall)
2503 ++NumTailCalls;
2504 }
2505
2506 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2507 report_fatal_error("failed to perform tail call elimination on a call "
2508 "site marked musttail");
2509
2510 // Get a count of how many bytes are to be pushed on the stack.
2511 unsigned NumBytes = CCInfo.getStackSize();
2512
2513 // SPDiff is the byte offset of the call's argument area from the callee's.
2514 // Stores to callee stack arguments will be placed in FixedStackSlots offset
2515 // by this amount for a tail call. In a sibling call it must be 0 because the
2516 // caller will deallocate the entire stack and the callee still expects its
2517 // arguments to begin at SP+0. Completely unused for non-tail calls.
2518 int SPDiff = 0;
2519
2520 if (isTailCall && !isSibCall) {
2521 auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2522 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2523
2524 // Since callee will pop argument stack as a tail call, we must keep the
2525 // popped size 16-byte aligned.
2526 MaybeAlign StackAlign = DAG.getDataLayout().getStackAlignment();
2527 assert(StackAlign && "data layout string is missing stack alignment");
2528 NumBytes = alignTo(NumBytes, *StackAlign);
2529
2530 // SPDiff will be negative if this tail call requires more space than we
2531 // would automatically have in our incoming argument space. Positive if we
2532 // can actually shrink the stack.
2533 SPDiff = NumReusableBytes - NumBytes;
2534
2535 // If this call requires more stack than we have available from
2536 // LowerFormalArguments, tell FrameLowering to reserve space for it.
2537 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2538 AFI->setArgRegsSaveSize(-SPDiff);
2539 }
2540
2541 if (isSibCall) {
2542 // For sibling tail calls, memory operands are available in our caller's stack.
2543 NumBytes = 0;
2544 } else {
2545 // Adjust the stack pointer for the new arguments...
2546 // These operations are automatically eliminated by the prolog/epilog pass
2547 Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2548 }
2549
2551 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2552
2553 RegsToPassVector RegsToPass;
2554 SmallVector<SDValue, 8> MemOpChains;
2555
2556 // If we are doing a tail-call, any byval arguments will be written to stack
2557 // space which was used for incoming arguments. If any the values being used
2558 // are incoming byval arguments to this function, then they might be
2559 // overwritten by the stores of the outgoing arguments. To avoid this, we
2560 // need to make a temporary copy of them in local stack space, then copy back
2561 // to the argument area.
2562 DenseMap<unsigned, SDValue> ByValTemporaries;
2563 SDValue ByValTempChain;
2564 if (isTailCall) {
2565 SmallVector<SDValue, 8> ByValCopyChains;
2566 for (const CCValAssign &VA : ArgLocs) {
2567 unsigned ArgIdx = VA.getValNo();
2568 SDValue Src = OutVals[ArgIdx];
2569 ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;
2570
2571 if (!Flags.isByVal())
2572 continue;
2573
2574 SDValue Dst;
2575 MachinePointerInfo DstInfo;
2576 std::tie(Dst, DstInfo) =
2577 computeAddrForCallArg(dl, DAG, VA, SDValue(), true, SPDiff);
2578 ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
2579
2580 if (Copy == NoCopy) {
2581 // If the argument is already at the correct offset on the stack
2582 // (because we are forwarding a byval argument from our caller), we
2583 // don't need any copying.
2584 continue;
2585 } else if (Copy == CopyOnce) {
2586 // If the argument is in our local stack frame, no other argument
2587 // preparation can clobber it, so we can copy it to the final location
2588 // later.
2589 ByValTemporaries[ArgIdx] = Src;
2590 } else {
2591 assert(Copy == CopyViaTemp && "unexpected enum value");
2592 // If we might be copying this argument from the outgoing argument
2593 // stack area, we need to copy via a temporary in the local stack
2594 // frame.
2595 int TempFrameIdx = MFI.CreateStackObject(
2596 Flags.getByValSize(), Flags.getNonZeroByValAlign(), false);
2597 SDValue Temp =
2598 DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout()));
2599
2600 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2601 SDValue AlignNode =
2602 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2603
2604 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2605 SDValue Ops[] = {Chain, Temp, Src, SizeNode, AlignNode};
2606 ByValCopyChains.push_back(
2607 DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops));
2608 ByValTemporaries[ArgIdx] = Temp;
2609 }
2610 }
2611 if (!ByValCopyChains.empty())
2612 ByValTempChain =
2613 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
2614 }
2615
2616 // During a tail call, stores to the argument area must happen after all of
2617 // the function's incoming arguments have been loaded because they may alias.
2618 // This is done by folding in a TokenFactor from LowerFormalArguments, but
2619 // there's no point in doing so repeatedly so this tracks whether that's
2620 // happened yet.
2621 bool AfterFormalArgLoads = false;
2622
2623 // Walk the register/memloc assignments, inserting copies/loads. In the case
2624 // of tail call optimization, arguments are handled later.
2625 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2626 i != e;
2627 ++i, ++realArgIdx) {
2628 CCValAssign &VA = ArgLocs[i];
2629 SDValue Arg = OutVals[realArgIdx];
2630 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2631 bool isByVal = Flags.isByVal();
2632
2633 // Promote the value if needed.
2634 switch (VA.getLocInfo()) {
2635 default: llvm_unreachable("Unknown loc info!");
2636 case CCValAssign::Full: break;
2637 case CCValAssign::SExt:
2638 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2639 break;
2640 case CCValAssign::ZExt:
2641 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2642 break;
2643 case CCValAssign::AExt:
2644 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2645 break;
2646 case CCValAssign::BCvt:
2647 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2648 break;
2649 }
2650
2651 if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2652 Chain = DAG.getStackArgumentTokenFactor(Chain);
2653 if (ByValTempChain)
2654 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain,
2655 ByValTempChain);
2656 AfterFormalArgLoads = true;
2657 }
2658
2659 // f16 arguments have their size extended to 4 bytes and passed as if they
2660 // had been copied to the LSBs of a 32-bit register.
2661 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2662 if (VA.needsCustom() &&
2663 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2664 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2665 } else {
2666 // f16 arguments could have been extended prior to argument lowering.
2667 // Mask them arguments if this is a CMSE nonsecure call.
2668 auto ArgVT = Outs[realArgIdx].ArgVT;
2669 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2670 auto LocBits = VA.getLocVT().getSizeInBits();
2671 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2672 SDValue Mask =
2673 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2674 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2675 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2676 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2677 }
2678 }
2679
2680 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2681 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2682 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2683 DAG.getConstant(0, dl, MVT::i32));
2684 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2685 DAG.getConstant(1, dl, MVT::i32));
2686
2687 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2688 StackPtr, MemOpChains, isTailCall, SPDiff);
2689
2690 VA = ArgLocs[++i]; // skip ahead to next loc
2691 if (VA.isRegLoc()) {
2692 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2693 StackPtr, MemOpChains, isTailCall, SPDiff);
2694 } else {
2695 assert(VA.isMemLoc());
2696 SDValue DstAddr;
2697 MachinePointerInfo DstInfo;
2698 std::tie(DstAddr, DstInfo) =
2699 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2700 MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2701 }
2702 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2703 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2704 StackPtr, MemOpChains, isTailCall, SPDiff);
2705 } else if (VA.isRegLoc()) {
2706 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2707 Outs[0].VT == MVT::i32) {
2708 assert(VA.getLocVT() == MVT::i32 &&
2709 "unexpected calling convention register assignment");
2710 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2711 "unexpected use of 'returned'");
2712 isThisReturn = true;
2713 }
2714 const TargetOptions &Options = DAG.getTarget().Options;
2715 if (Options.EmitCallSiteInfo)
2716 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
2717 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2718 } else if (isByVal) {
2719 assert(VA.isMemLoc());
2720 unsigned offset = 0;
2721
2722 // True if this byval aggregate will be split between registers
2723 // and memory.
2724 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2725 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2726
2727 SDValue ByValSrc;
2728 bool NeedsStackCopy;
2729 if (ByValTemporaries.contains(realArgIdx)) {
2730 ByValSrc = ByValTemporaries[realArgIdx];
2731 NeedsStackCopy = true;
2732 } else {
2733 ByValSrc = Arg;
2734 NeedsStackCopy = !isTailCall;
2735 }
2736
2737 // If part of the argument is in registers, load them.
2738 if (CurByValIdx < ByValArgsCount) {
2739 unsigned RegBegin, RegEnd;
2740 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2741
2742 EVT PtrVT =
2744 unsigned int i, j;
2745 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2746 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2747 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, Const);
2748 SDValue Load =
2749 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2750 DAG.InferPtrAlign(AddArg));
2751 MemOpChains.push_back(Load.getValue(1));
2752 RegsToPass.push_back(std::make_pair(j, Load));
2753 }
2754
2755 // If parameter size outsides register area, "offset" value
2756 // helps us to calculate stack slot for remained part properly.
2757 offset = RegEnd - RegBegin;
2758
2759 CCInfo.nextInRegsParam();
2760 }
2761
2762 // If the memory part of the argument isn't already in the correct place
2763 // (which can happen with tail calls), copy it into the argument area.
2764 if (NeedsStackCopy && Flags.getByValSize() > 4 * offset) {
2765 auto PtrVT = getPointerTy(DAG.getDataLayout());
2766 SDValue Dst;
2767 MachinePointerInfo DstInfo;
2768 std::tie(Dst, DstInfo) =
2769 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2770 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2771 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, SrcOffset);
2772 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2773 MVT::i32);
2774 SDValue AlignNode =
2775 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2776
2777 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2778 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2779 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2780 Ops));
2781 }
2782 } else {
2783 assert(VA.isMemLoc());
2784 SDValue DstAddr;
2785 MachinePointerInfo DstInfo;
2786 std::tie(DstAddr, DstInfo) =
2787 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2788
2789 SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2790 MemOpChains.push_back(Store);
2791 }
2792 }
2793
2794 if (!MemOpChains.empty())
2795 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2796
2797 // Build a sequence of copy-to-reg nodes chained together with token chain
2798 // and flag operands which copy the outgoing args into the appropriate regs.
2799 SDValue InGlue;
2800 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2801 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2802 RegsToPass[i].second, InGlue);
2803 InGlue = Chain.getValue(1);
2804 }
2805
2806 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2807 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2808 // node so that legalize doesn't hack it.
2809 bool isDirect = false;
2810
2812 const GlobalValue *GVal = nullptr;
2813 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2814 GVal = G->getGlobal();
2815 bool isStub = !TM.shouldAssumeDSOLocal(GVal) && Subtarget->isTargetMachO();
2816
2817 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2818 bool isLocalARMFunc = false;
2819 auto PtrVt = getPointerTy(DAG.getDataLayout());
2820
2821 if (Subtarget->genLongCalls()) {
2822 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2823 "long-calls codegen is not position independent!");
2824 // Handle a global address or an external symbol. If it's not one of
2825 // those, the target's already in a register, so we don't need to do
2826 // anything extra.
2827 if (isa<GlobalAddressSDNode>(Callee)) {
2828 if (Subtarget->genExecuteOnly()) {
2829 if (Subtarget->useMovt())
2830 ++NumMovwMovt;
2831 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2832 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2833 } else {
2834 // Create a constant pool entry for the callee address
2835 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2837 GVal, ARMPCLabelIndex, ARMCP::CPValue, 0);
2838
2839 // Get the address of the callee into a register
2840 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2841 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2842 Callee = DAG.getLoad(
2843 PtrVt, dl, DAG.getEntryNode(), Addr,
2845 }
2846 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2847 const char *Sym = S->getSymbol();
2848
2849 if (Subtarget->genExecuteOnly()) {
2850 if (Subtarget->useMovt())
2851 ++NumMovwMovt;
2852 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2853 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2854 } else {
2855 // Create a constant pool entry for the callee address
2856 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2858 *DAG.getContext(), Sym, ARMPCLabelIndex, 0);
2859
2860 // Get the address of the callee into a register
2861 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2862 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2863 Callee = DAG.getLoad(
2864 PtrVt, dl, DAG.getEntryNode(), Addr,
2866 }
2867 }
2868 } else if (isa<GlobalAddressSDNode>(Callee)) {
2869 if (!PreferIndirect) {
2870 isDirect = true;
2871 bool isDef = GVal->isStrongDefinitionForLinker();
2872
2873 // ARM call to a local ARM function is predicable.
2874 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2875 // tBX takes a register source operand.
2876 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2877 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2878 Callee = DAG.getNode(
2879 ARMISD::WrapperPIC, dl, PtrVt,
2880 DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2881 Callee = DAG.getLoad(
2882 PtrVt, dl, DAG.getEntryNode(), Callee,
2886 } else if (Subtarget->isTargetCOFF()) {
2887 assert(Subtarget->isTargetWindows() &&
2888 "Windows is the only supported COFF target");
2889 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2890 if (GVal->hasDLLImportStorageClass())
2891 TargetFlags = ARMII::MO_DLLIMPORT;
2892 else if (!TM.shouldAssumeDSOLocal(GVal))
2893 TargetFlags = ARMII::MO_COFFSTUB;
2894 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0,
2895 TargetFlags);
2896 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2897 Callee =
2898 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2899 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2901 } else {
2902 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0);
2903 }
2904 }
2905 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2906 isDirect = true;
2907 // tBX takes a register source operand.
2908 const char *Sym = S->getSymbol();
2909 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2910 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2913 ARMPCLabelIndex, 4);
2914 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2915 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2916 Callee = DAG.getLoad(
2917 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2919 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2920 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2921 } else {
2922 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2923 }
2924 }
2925
2926 if (isCmseNSCall) {
2927 assert(!isARMFunc && !isDirect &&
2928 "Cannot handle call to ARM function or direct call");
2929 if (NumBytes > 0) {
2931 "call to non-secure function would "
2932 "require passing arguments on stack",
2933 dl.getDebugLoc());
2934 DAG.getContext()->diagnose(Diag);
2935 }
2936 if (isStructRet) {
2939 "call to non-secure function would return value through pointer",
2940 dl.getDebugLoc());
2941 DAG.getContext()->diagnose(Diag);
2942 }
2943 }
2944
2945 // FIXME: handle tail calls differently.
2946 unsigned CallOpc;
2947 if (Subtarget->isThumb()) {
2948 if (GuardWithBTI)
2949 CallOpc = ARMISD::t2CALL_BTI;
2950 else if (isCmseNSCall)
2951 CallOpc = ARMISD::tSECALL;
2952 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2953 CallOpc = ARMISD::CALL_NOLINK;
2954 else
2955 CallOpc = ARMISD::CALL;
2956 } else {
2957 if (!isDirect && !Subtarget->hasV5TOps())
2958 CallOpc = ARMISD::CALL_NOLINK;
2959 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2960 // Emit regular call when code size is the priority
2961 !Subtarget->hasMinSize())
2962 // "mov lr, pc; b _foo" to avoid confusing the RSP
2963 CallOpc = ARMISD::CALL_NOLINK;
2964 else
2965 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2966 }
2967
2968 // We don't usually want to end the call-sequence here because we would tidy
2969 // the frame up *after* the call, however in the ABI-changing tail-call case
2970 // we've carefully laid out the parameters so that when sp is reset they'll be
2971 // in the correct location.
2972 if (isTailCall && !isSibCall) {
2973 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, dl);
2974 InGlue = Chain.getValue(1);
2975 }
2976
2977 std::vector<SDValue> Ops;
2978 Ops.push_back(Chain);
2979 Ops.push_back(Callee);
2980
2981 if (isTailCall) {
2982 Ops.push_back(DAG.getSignedTargetConstant(SPDiff, dl, MVT::i32));
2983 }
2984
2985 // Add argument registers to the end of the list so that they are known live
2986 // into the call.
2987 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2988 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2989 RegsToPass[i].second.getValueType()));
2990
2991 // Add a register mask operand representing the call-preserved registers.
2992 const uint32_t *Mask;
2993 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2994 if (isThisReturn) {
2995 // For 'this' returns, use the R0-preserving mask if applicable
2996 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2997 if (!Mask) {
2998 // Set isThisReturn to false if the calling convention is not one that
2999 // allows 'returned' to be modeled in this way, so LowerCallResult does
3000 // not try to pass 'this' straight through
3001 isThisReturn = false;
3002 Mask = ARI->getCallPreservedMask(MF, CallConv);
3003 }
3004 } else
3005 Mask = ARI->getCallPreservedMask(MF, CallConv);
3006
3007 assert(Mask && "Missing call preserved mask for calling convention");
3008 Ops.push_back(DAG.getRegisterMask(Mask));
3009
3010 if (InGlue.getNode())
3011 Ops.push_back(InGlue);
3012
3013 if (isTailCall) {
3015 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, MVT::Other, Ops);
3016 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
3017 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
3018 return Ret;
3019 }
3020
3021 // Returns a chain and a flag for retval copy to use.
3022 Chain = DAG.getNode(CallOpc, dl, {MVT::Other, MVT::Glue}, Ops);
3023 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
3024 InGlue = Chain.getValue(1);
3025 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
3026
3027 // If we're guaranteeing tail-calls will be honoured, the callee must
3028 // pop its own argument stack on return. But this call is *not* a tail call so
3029 // we need to undo that after it returns to restore the status-quo.
3030 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
3031 uint64_t CalleePopBytes =
3032 canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1U;
3033
3034 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
3035 if (!Ins.empty())
3036 InGlue = Chain.getValue(1);
3037
3038 // Handle result values, copying them out of physregs into vregs that we
3039 // return.
3040 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
3041 InVals, isThisReturn,
3042 isThisReturn ? OutVals[0] : SDValue(), isCmseNSCall);
3043}
3044
3045/// HandleByVal - Every parameter *after* a byval parameter is passed
3046/// on the stack. Remember the next parameter register to allocate,
3047/// and then confiscate the rest of the parameter registers to insure
3048/// this.
3049void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
3050 Align Alignment) const {
3051 // Byval (as with any stack) slots are always at least 4 byte aligned.
3052 Alignment = std::max(Alignment, Align(4));
3053
3055 if (!Reg)
3056 return;
3057
3058 unsigned AlignInRegs = Alignment.value() / 4;
3059 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
3060 for (unsigned i = 0; i < Waste; ++i)
3061 Reg = State->AllocateReg(GPRArgRegs);
3062
3063 if (!Reg)
3064 return;
3065
3066 unsigned Excess = 4 * (ARM::R4 - Reg);
3067
3068 // Special case when NSAA != SP and parameter size greater than size of
3069 // all remained GPR regs. In that case we can't split parameter, we must
3070 // send it to stack. We also must set NCRN to R4, so waste all
3071 // remained registers.
3072 const unsigned NSAAOffset = State->getStackSize();
3073 if (NSAAOffset != 0 && Size > Excess) {
3074 while (State->AllocateReg(GPRArgRegs))
3075 ;
3076 return;
3077 }
3078
3079 // First register for byval parameter is the first register that wasn't
3080 // allocated before this method call, so it would be "reg".
3081 // If parameter is small enough to be saved in range [reg, r4), then
3082 // the end (first after last) register would be reg + param-size-in-regs,
3083 // else parameter would be splitted between registers and stack,
3084 // end register would be r4 in this case.
3085 unsigned ByValRegBegin = Reg;
3086 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
3087 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
3088 // Note, first register is allocated in the beginning of function already,
3089 // allocate remained amount of registers we need.
3090 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
3091 State->AllocateReg(GPRArgRegs);
3092 // A byval parameter that is split between registers and memory needs its
3093 // size truncated here.
3094 // In the case where the entire structure fits in registers, we set the
3095 // size in memory to zero.
3096 Size = std::max<int>(Size - Excess, 0);
3097}
3098
3099/// IsEligibleForTailCallOptimization - Check whether the call is eligible
3100/// for tail call optimization. Targets which want to do tail call
3101/// optimization should implement this function. Note that this function also
3102/// processes musttail calls, so when this function returns false on a valid
3103/// musttail call, a fatal backend error occurs.
3104bool ARMTargetLowering::IsEligibleForTailCallOptimization(
3106 SmallVectorImpl<CCValAssign> &ArgLocs, const bool isIndirect) const {
3107 CallingConv::ID CalleeCC = CLI.CallConv;
3108 SDValue Callee = CLI.Callee;
3109 bool isVarArg = CLI.IsVarArg;
3110 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3111 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3113 const SelectionDAG &DAG = CLI.DAG;
3115 const Function &CallerF = MF.getFunction();
3116 CallingConv::ID CallerCC = CallerF.getCallingConv();
3117
3118 assert(Subtarget->supportsTailCall());
3119
3120 // Indirect tail-calls require a register to hold the target address. That
3121 // register must be:
3122 // * Allocatable (i.e. r0-r7 if the target is Thumb1).
3123 // * Not callee-saved, so must be one of r0-r3 or r12.
3124 // * Not used to hold an argument to the tail-called function, which might be
3125 // in r0-r3.
3126 // * Not used to hold the return address authentication code, which is in r12
3127 // if enabled.
3128 // Sometimes, no register matches all of these conditions, so we can't do a
3129 // tail-call.
3130 if (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect) {
3131 SmallSet<MCPhysReg, 5> AddressRegisters;
3132 for (Register R : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
3133 AddressRegisters.insert(R);
3134 if (!(Subtarget->isThumb1Only() ||
3136 AddressRegisters.insert(ARM::R12);
3137 for (const CCValAssign &AL : ArgLocs)
3138 if (AL.isRegLoc())
3139 AddressRegisters.erase(AL.getLocReg());
3140 if (AddressRegisters.empty()) {
3141 LLVM_DEBUG(dbgs() << "false (no reg to hold function pointer)\n");
3142 return false;
3143 }
3144 }
3145
3146 // Look for obvious safe cases to perform tail call optimization that do not
3147 // require ABI changes. This is what gcc calls sibcall.
3148
3149 // Exception-handling functions need a special set of instructions to indicate
3150 // a return to the hardware. Tail-calling another function would probably
3151 // break this.
3152 if (CallerF.hasFnAttribute("interrupt")) {
3153 LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n");
3154 return false;
3155 }
3156
3157 if (canGuaranteeTCO(CalleeCC,
3158 getTargetMachine().Options.GuaranteedTailCallOpt)) {
3159 LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false")
3160 << " (guaranteed tail-call CC)\n");
3161 return CalleeCC == CallerCC;
3162 }
3163
3164 // Also avoid sibcall optimization if either caller or callee uses struct
3165 // return semantics.
3166 bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
3167 bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
3168 if (isCalleeStructRet != isCallerStructRet) {
3169 LLVM_DEBUG(dbgs() << "false (struct-ret)\n");
3170 return false;
3171 }
3172
3173 // Externally-defined functions with weak linkage should not be
3174 // tail-called on ARM when the OS does not support dynamic
3175 // pre-emption of symbols, as the AAELF spec requires normal calls
3176 // to undefined weak functions to be replaced with a NOP or jump to the
3177 // next instruction. The behaviour of branch instructions in this
3178 // situation (as used for tail calls) is implementation-defined, so we
3179 // cannot rely on the linker replacing the tail call with a return.
3180 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3181 const GlobalValue *GV = G->getGlobal();
3183 if (GV->hasExternalWeakLinkage() &&
3184 (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
3185 TT.isOSBinFormatMachO())) {
3186 LLVM_DEBUG(dbgs() << "false (external weak linkage)\n");
3187 return false;
3188 }
3189 }
3190
3191 // Check that the call results are passed in the same way.
3192 LLVMContext &C = *DAG.getContext();
3194 getEffectiveCallingConv(CalleeCC, isVarArg),
3195 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3196 CCAssignFnForReturn(CalleeCC, isVarArg),
3197 CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) {
3198 LLVM_DEBUG(dbgs() << "false (incompatible results)\n");
3199 return false;
3200 }
3201 // The callee has to preserve all registers the caller needs to preserve.
3202 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3203 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3204 if (CalleeCC != CallerCC) {
3205 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3206 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {
3207 LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n");
3208 return false;
3209 }
3210 }
3211
3212 // If Caller's vararg argument has been split between registers and stack, do
3213 // not perform tail call, since part of the argument is in caller's local
3214 // frame.
3215 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3216 if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) {
3217 LLVM_DEBUG(dbgs() << "false (arg reg save area)\n");
3218 return false;
3219 }
3220
3221 // If the callee takes no arguments then go on to check the results of the
3222 // call.
3223 const MachineRegisterInfo &MRI = MF.getRegInfo();
3224 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) {
3225 LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n");
3226 return false;
3227 }
3228
3229 // If the stack arguments for this call do not fit into our own save area then
3230 // the call cannot be made tail.
3231 if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize())
3232 return false;
3233
3234 LLVM_DEBUG(dbgs() << "true\n");
3235 return true;
3236}
3237
3238bool
3239ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3240 MachineFunction &MF, bool isVarArg,
3242 LLVMContext &Context) const {
3244 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3245 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3246}
3247
3249 const SDLoc &DL, SelectionDAG &DAG) {
3250 const MachineFunction &MF = DAG.getMachineFunction();
3251 const Function &F = MF.getFunction();
3252
3253 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3254
3255 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3256 // version of the "preferred return address". These offsets affect the return
3257 // instruction if this is a return from PL1 without hypervisor extensions.
3258 // IRQ/FIQ: +4 "subs pc, lr, #4"
3259 // SWI: 0 "subs pc, lr, #0"
3260 // ABORT: +4 "subs pc, lr, #4"
3261 // UNDEF: +4/+2 "subs pc, lr, #0"
3262 // UNDEF varies depending on where the exception came from ARM or Thumb
3263 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3264
3265 int64_t LROffset;
3266 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3267 IntKind == "ABORT")
3268 LROffset = 4;
3269 else if (IntKind == "SWI" || IntKind == "UNDEF")
3270 LROffset = 0;
3271 else
3272 report_fatal_error("Unsupported interrupt attribute. If present, value "
3273 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3274
3275 RetOps.insert(RetOps.begin() + 1,
3276 DAG.getConstant(LROffset, DL, MVT::i32, false));
3277
3278 return DAG.getNode(ARMISD::INTRET_GLUE, DL, MVT::Other, RetOps);
3279}
3280
3281SDValue
3282ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3283 bool isVarArg,
3285 const SmallVectorImpl<SDValue> &OutVals,
3286 const SDLoc &dl, SelectionDAG &DAG) const {
3287 // CCValAssign - represent the assignment of the return value to a location.
3289
3290 // CCState - Info about the registers and stack slots.
3291 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3292 *DAG.getContext());
3293
3294 // Analyze outgoing return values.
3295 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3296
3297 SDValue Glue;
3299 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3300 bool isLittleEndian = Subtarget->isLittle();
3301
3304 AFI->setReturnRegsCount(RVLocs.size());
3305
3306 // Report error if cmse entry function returns structure through first ptr arg.
3307 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3308 // Note: using an empty SDLoc(), as the first line of the function is a
3309 // better place to report than the last line.
3312 "secure entry function would return value through pointer",
3313 SDLoc().getDebugLoc());
3314 DAG.getContext()->diagnose(Diag);
3315 }
3316
3317 // Copy the result values into the output registers.
3318 for (unsigned i = 0, realRVLocIdx = 0;
3319 i != RVLocs.size();
3320 ++i, ++realRVLocIdx) {
3321 CCValAssign &VA = RVLocs[i];
3322 assert(VA.isRegLoc() && "Can only return in registers!");
3323
3324 SDValue Arg = OutVals[realRVLocIdx];
3325 bool ReturnF16 = false;
3326
3327 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3328 // Half-precision return values can be returned like this:
3329 //
3330 // t11 f16 = fadd ...
3331 // t12: i16 = bitcast t11
3332 // t13: i32 = zero_extend t12
3333 // t14: f32 = bitcast t13 <~~~~~~~ Arg
3334 //
3335 // to avoid code generation for bitcasts, we simply set Arg to the node
3336 // that produces the f16 value, t11 in this case.
3337 //
3338 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3339 SDValue ZE = Arg.getOperand(0);
3340 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3341 SDValue BC = ZE.getOperand(0);
3342 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3343 Arg = BC.getOperand(0);
3344 ReturnF16 = true;
3345 }
3346 }
3347 }
3348 }
3349
3350 switch (VA.getLocInfo()) {
3351 default: llvm_unreachable("Unknown loc info!");
3352 case CCValAssign::Full: break;
3353 case CCValAssign::BCvt:
3354 if (!ReturnF16)
3355 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3356 break;
3357 }
3358
3359 // Mask f16 arguments if this is a CMSE nonsecure entry.
3360 auto RetVT = Outs[realRVLocIdx].ArgVT;
3361 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3362 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3363 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3364 } else {
3365 auto LocBits = VA.getLocVT().getSizeInBits();
3366 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3367 SDValue Mask =
3368 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3369 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3370 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3371 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3372 }
3373 }
3374
3375 if (VA.needsCustom() &&
3376 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3377 if (VA.getLocVT() == MVT::v2f64) {
3378 // Extract the first half and return it in two registers.
3379 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3380 DAG.getConstant(0, dl, MVT::i32));
3381 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3382 DAG.getVTList(MVT::i32, MVT::i32), Half);
3383
3384 Chain =
3385 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3386 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Glue);
3387 Glue = Chain.getValue(1);
3388 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3389 VA = RVLocs[++i]; // skip ahead to next loc
3390 Chain =
3391 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3392 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Glue);
3393 Glue = Chain.getValue(1);
3394 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3395 VA = RVLocs[++i]; // skip ahead to next loc
3396
3397 // Extract the 2nd half and fall through to handle it as an f64 value.
3398 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3399 DAG.getConstant(1, dl, MVT::i32));
3400 }
3401 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3402 // available.
3403 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3404 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3405 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3406 fmrrd.getValue(isLittleEndian ? 0 : 1), Glue);
3407 Glue = Chain.getValue(1);
3408 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3409 VA = RVLocs[++i]; // skip ahead to next loc
3410 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3411 fmrrd.getValue(isLittleEndian ? 1 : 0), Glue);
3412 } else
3413 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
3414
3415 // Guarantee that all emitted copies are
3416 // stuck together, avoiding something bad.
3417 Glue = Chain.getValue(1);
3418 RetOps.push_back(DAG.getRegister(
3419 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3420 }
3421 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3422 const MCPhysReg *I =
3423 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3424 if (I) {
3425 for (; *I; ++I) {
3426 if (ARM::GPRRegClass.contains(*I))
3427 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3428 else if (ARM::DPRRegClass.contains(*I))
3430 else
3431 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3432 }
3433 }
3434
3435 // Update chain and glue.
3436 RetOps[0] = Chain;
3437 if (Glue.getNode())
3438 RetOps.push_back(Glue);
3439
3440 // CPUs which aren't M-class use a special sequence to return from
3441 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3442 // though we use "subs pc, lr, #N").
3443 //
3444 // M-class CPUs actually use a normal return sequence with a special
3445 // (hardware-provided) value in LR, so the normal code path works.
3446 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3447 !Subtarget->isMClass()) {
3448 if (Subtarget->isThumb1Only())
3449 report_fatal_error("interrupt attribute is not supported in Thumb1");
3450 return LowerInterruptReturn(RetOps, dl, DAG);
3451 }
3452
3455 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3456}
3457
3458bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3459 if (N->getNumValues() != 1)
3460 return false;
3461 if (!N->hasNUsesOfValue(1, 0))
3462 return false;
3463
3464 SDValue TCChain = Chain;
3465 SDNode *Copy = *N->user_begin();
3466 if (Copy->getOpcode() == ISD::CopyToReg) {
3467 // If the copy has a glue operand, we conservatively assume it isn't safe to
3468 // perform a tail call.
3469 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3470 return false;
3471 TCChain = Copy->getOperand(0);
3472 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3473 SDNode *VMov = Copy;
3474 // f64 returned in a pair of GPRs.
3476 for (SDNode *U : VMov->users()) {
3477 if (U->getOpcode() != ISD::CopyToReg)
3478 return false;
3479 Copies.insert(U);
3480 }
3481 if (Copies.size() > 2)
3482 return false;
3483
3484 for (SDNode *U : VMov->users()) {
3485 SDValue UseChain = U->getOperand(0);
3486 if (Copies.count(UseChain.getNode()))
3487 // Second CopyToReg
3488 Copy = U;
3489 else {
3490 // We are at the top of this chain.
3491 // If the copy has a glue operand, we conservatively assume it
3492 // isn't safe to perform a tail call.
3493 if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3494 return false;
3495 // First CopyToReg
3496 TCChain = UseChain;
3497 }
3498 }
3499 } else if (Copy->getOpcode() == ISD::BITCAST) {
3500 // f32 returned in a single GPR.
3501 if (!Copy->hasOneUse())
3502 return false;
3503 Copy = *Copy->user_begin();
3504 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3505 return false;
3506 // If the copy has a glue operand, we conservatively assume it isn't safe to
3507 // perform a tail call.
3508 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3509 return false;
3510 TCChain = Copy->getOperand(0);
3511 } else {
3512 return false;
3513 }
3514
3515 bool HasRet = false;
3516 for (const SDNode *U : Copy->users()) {
3517 if (U->getOpcode() != ARMISD::RET_GLUE &&
3518 U->getOpcode() != ARMISD::INTRET_GLUE)
3519 return false;
3520 HasRet = true;
3521 }
3522
3523 if (!HasRet)
3524 return false;
3525
3526 Chain = TCChain;
3527 return true;
3528}
3529
3530bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3531 if (!Subtarget->supportsTailCall())
3532 return false;
3533
3534 if (!CI->isTailCall())
3535 return false;
3536
3537 return true;
3538}
3539
3540// Trying to write a 64 bit value so need to split into two 32 bit values first,
3541// and pass the lower and high parts through.
3543 SDLoc DL(Op);
3544 SDValue WriteValue = Op->getOperand(2);
3545
3546 // This function is only supposed to be called for i64 type argument.
3547 assert(WriteValue.getValueType() == MVT::i64
3548 && "LowerWRITE_REGISTER called for non-i64 type argument.");
3549
3550 SDValue Lo, Hi;
3551 std::tie(Lo, Hi) = DAG.SplitScalar(WriteValue, DL, MVT::i32, MVT::i32);
3552 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3553 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3554}
3555
3556// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3557// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3558// one of the above mentioned nodes. It has to be wrapped because otherwise
3559// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3560// be used to form addressing mode. These wrapped nodes will be selected
3561// into MOVi.
3562SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3563 SelectionDAG &DAG) const {
3564 EVT PtrVT = Op.getValueType();
3565 // FIXME there is no actual debug info here
3566 SDLoc dl(Op);
3567 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3568 SDValue Res;
3569
3570 // When generating execute-only code Constant Pools must be promoted to the
3571 // global data section. It's a bit ugly that we can't share them across basic
3572 // blocks, but this way we guarantee that execute-only behaves correct with
3573 // position-independent addressing modes.
3574 if (Subtarget->genExecuteOnly()) {
3575 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3576 auto T = const_cast<Type*>(CP->getType());
3577 auto C = const_cast<Constant*>(CP->getConstVal());
3578 auto M = const_cast<Module*>(DAG.getMachineFunction().
3580 auto GV = new GlobalVariable(
3581 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3584 Twine(AFI->createPICLabelUId())
3585 );
3586 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3587 dl, PtrVT);
3588 return LowerGlobalAddress(GA, DAG);
3589 }
3590
3591 // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3592 // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3593 Align CPAlign = CP->getAlign();
3594 if (Subtarget->isThumb1Only())
3595 CPAlign = std::max(CPAlign, Align(4));
3596 if (CP->isMachineConstantPoolEntry())
3597 Res =
3598 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3599 else
3600 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3601 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3602}
3603
3605 // If we don't have a 32-bit pc-relative branch instruction then the jump
3606 // table consists of block addresses. Usually this is inline, but for
3607 // execute-only it must be placed out-of-line.
3608 if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
3611}
3612
3613SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3614 SelectionDAG &DAG) const {
3617 unsigned ARMPCLabelIndex = 0;
3618 SDLoc DL(Op);
3619 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3620 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3621 SDValue CPAddr;
3622 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3623 if (!IsPositionIndependent) {
3624 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3625 } else {
3626 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3627 ARMPCLabelIndex = AFI->createPICLabelUId();
3629 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3630 ARMCP::CPBlockAddress, PCAdj);
3631 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3632 }
3633 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3634 SDValue Result = DAG.getLoad(
3635 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3637 if (!IsPositionIndependent)
3638 return Result;
3639 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3640 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3641}
3642
3643/// Convert a TLS address reference into the correct sequence of loads
3644/// and calls to compute the variable's address for Darwin, and return an
3645/// SDValue containing the final node.
3646
3647/// Darwin only has one TLS scheme which must be capable of dealing wit