LLVM 19.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/Statistic.h"
38#include "llvm/ADT/StringRef.h"
40#include "llvm/ADT/Twine.h"
67#include "llvm/IR/Attributes.h"
68#include "llvm/IR/CallingConv.h"
69#include "llvm/IR/Constant.h"
70#include "llvm/IR/Constants.h"
71#include "llvm/IR/DataLayout.h"
72#include "llvm/IR/DebugLoc.h"
74#include "llvm/IR/Function.h"
75#include "llvm/IR/GlobalAlias.h"
76#include "llvm/IR/GlobalValue.h"
78#include "llvm/IR/IRBuilder.h"
79#include "llvm/IR/InlineAsm.h"
80#include "llvm/IR/Instruction.h"
83#include "llvm/IR/Intrinsics.h"
84#include "llvm/IR/IntrinsicsARM.h"
85#include "llvm/IR/Module.h"
87#include "llvm/IR/Type.h"
88#include "llvm/IR/User.h"
89#include "llvm/IR/Value.h"
90#include "llvm/MC/MCInstrDesc.h"
93#include "llvm/MC/MCSchedule.h"
100#include "llvm/Support/Debug.h"
108#include <algorithm>
109#include <cassert>
110#include <cstdint>
111#include <cstdlib>
112#include <iterator>
113#include <limits>
114#include <optional>
115#include <tuple>
116#include <utility>
117#include <vector>
118
119using namespace llvm;
120using namespace llvm::PatternMatch;
121
122#define DEBUG_TYPE "arm-isel"
123
124STATISTIC(NumTailCalls, "Number of tail calls");
125STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
126STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
127STATISTIC(NumConstpoolPromoted,
128 "Number of constants with their storage promoted into constant pools");
129
130static cl::opt<bool>
131ARMInterworking("arm-interworking", cl::Hidden,
132 cl::desc("Enable / disable ARM interworking (for debugging only)"),
133 cl::init(true));
134
136 "arm-promote-constant", cl::Hidden,
137 cl::desc("Enable / disable promotion of unnamed_addr constants into "
138 "constant pools"),
139 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
141 "arm-promote-constant-max-size", cl::Hidden,
142 cl::desc("Maximum size of constant to promote into a constant pool"),
143 cl::init(64));
145 "arm-promote-constant-max-total", cl::Hidden,
146 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
147 cl::init(128));
148
150MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
151 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
152 cl::init(2));
153
154// The APCS parameter registers.
155static const MCPhysReg GPRArgRegs[] = {
156 ARM::R0, ARM::R1, ARM::R2, ARM::R3
157};
158
159void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
160 if (VT != PromotedLdStVT) {
162 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
163
165 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
166 }
167
168 MVT ElemTy = VT.getVectorElementType();
169 if (ElemTy != MVT::f64)
173 if (ElemTy == MVT::i32) {
178 } else {
183 }
192 if (VT.isInteger()) {
196 }
197
198 // Neon does not support vector divide/remainder operations.
207
208 if (!VT.isFloatingPoint() &&
209 VT != MVT::v2i64 && VT != MVT::v1i64)
210 for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
211 setOperationAction(Opcode, VT, Legal);
212 if (!VT.isFloatingPoint())
213 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
214 setOperationAction(Opcode, VT, Legal);
215}
216
217void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
218 addRegisterClass(VT, &ARM::DPRRegClass);
219 addTypeForNEON(VT, MVT::f64);
220}
221
222void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
223 addRegisterClass(VT, &ARM::DPairRegClass);
224 addTypeForNEON(VT, MVT::v2f64);
225}
226
227void ARMTargetLowering::setAllExpand(MVT VT) {
228 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
229 setOperationAction(Opc, VT, Expand);
230
231 // We support these really simple operations even on types where all
232 // the actual arithmetic has to be broken down into simpler
233 // operations or turned into library calls.
238}
239
240void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
241 LegalizeAction Action) {
242 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
243 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
244 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
245}
246
247void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
248 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
249
250 for (auto VT : IntTypes) {
251 addRegisterClass(VT, &ARM::MQPRRegClass);
281
282 // No native support for these.
292
293 // Vector reductions
303
304 if (!HasMVEFP) {
309 } else {
312 }
313
314 // Pre and Post inc are supported on loads and stores
315 for (unsigned im = (unsigned)ISD::PRE_INC;
321 }
322 }
323
324 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
325 for (auto VT : FloatTypes) {
326 addRegisterClass(VT, &ARM::MQPRRegClass);
327 if (!HasMVEFP)
328 setAllExpand(VT);
329
330 // These are legal or custom whether we have MVE.fp or not
343
344 // Pre and Post inc are supported on loads and stores
345 for (unsigned im = (unsigned)ISD::PRE_INC;
351 }
352
353 if (HasMVEFP) {
361
362 // No native support for these.
376 }
377 }
378
379 // Custom Expand smaller than legal vector reductions to prevent false zero
380 // items being added.
389
390 // We 'support' these types up to bitcast/load/store level, regardless of
391 // MVE integer-only / float support. Only doing FP data processing on the FP
392 // vector types is inhibited at integer-only level.
393 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
394 for (auto VT : LongTypes) {
395 addRegisterClass(VT, &ARM::MQPRRegClass);
396 setAllExpand(VT);
402 }
404
405 // We can do bitwise operations on v2i64 vectors
406 setOperationAction(ISD::AND, MVT::v2i64, Legal);
407 setOperationAction(ISD::OR, MVT::v2i64, Legal);
408 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
409
410 // It is legal to extload from v4i8 to v4i16 or v4i32.
411 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
412 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
413 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
414
415 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
421
422 // Some truncating stores are legal too.
423 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
424 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
425 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
426
427 // Pre and Post inc on these are legal, given the correct extends
428 for (unsigned im = (unsigned)ISD::PRE_INC;
430 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
435 }
436 }
437
438 // Predicate types
439 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
440 for (auto VT : pTypes) {
441 addRegisterClass(VT, &ARM::VCCRRegClass);
456
457 if (!HasMVEFP) {
462 }
463 }
467 setOperationAction(ISD::OR, MVT::v2i1, Expand);
473
482}
483
485 const ARMSubtarget &STI)
486 : TargetLowering(TM), Subtarget(&STI) {
487 RegInfo = Subtarget->getRegisterInfo();
488 Itins = Subtarget->getInstrItineraryData();
489
492
493 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
494 !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
495 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
496 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
497 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
498 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
500 }
501
502 if (Subtarget->isTargetMachO()) {
503 // Uses VFP for Thumb libfuncs if available.
504 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
505 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
506 static const struct {
507 const RTLIB::Libcall Op;
508 const char * const Name;
509 const ISD::CondCode Cond;
510 } LibraryCalls[] = {
511 // Single-precision floating-point arithmetic.
512 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
513 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
514 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
515 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
516
517 // Double-precision floating-point arithmetic.
518 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
519 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
520 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
521 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
522
523 // Single-precision comparisons.
524 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
525 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
526 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
527 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
528 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
529 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
530 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
531
532 // Double-precision comparisons.
533 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
534 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
535 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
536 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
537 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
538 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
539 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
540
541 // Floating-point to integer conversions.
542 // i64 conversions are done via library routines even when generating VFP
543 // instructions, so use the same ones.
544 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
545 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
546 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
547 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
548
549 // Conversions between floating types.
550 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
551 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
552
553 // Integer to floating-point conversions.
554 // i64 conversions are done via library routines even when generating VFP
555 // instructions, so use the same ones.
556 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
557 // e.g., __floatunsidf vs. __floatunssidfvfp.
558 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
559 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
560 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
561 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
562 };
563
564 for (const auto &LC : LibraryCalls) {
565 setLibcallName(LC.Op, LC.Name);
566 if (LC.Cond != ISD::SETCC_INVALID)
567 setCmpLibcallCC(LC.Op, LC.Cond);
568 }
569 }
570 }
571
572 // These libcalls are not available in 32-bit.
573 setLibcallName(RTLIB::SHL_I128, nullptr);
574 setLibcallName(RTLIB::SRL_I128, nullptr);
575 setLibcallName(RTLIB::SRA_I128, nullptr);
576 setLibcallName(RTLIB::MUL_I128, nullptr);
577 setLibcallName(RTLIB::MULO_I64, nullptr);
578 setLibcallName(RTLIB::MULO_I128, nullptr);
579
580 // RTLIB
581 if (Subtarget->isAAPCS_ABI() &&
582 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
583 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
584 static const struct {
585 const RTLIB::Libcall Op;
586 const char * const Name;
587 const CallingConv::ID CC;
588 const ISD::CondCode Cond;
589 } LibraryCalls[] = {
590 // Double-precision floating-point arithmetic helper functions
591 // RTABI chapter 4.1.2, Table 2
592 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
593 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
594 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
595 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
596
597 // Double-precision floating-point comparison helper functions
598 // RTABI chapter 4.1.2, Table 3
599 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
600 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
601 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
602 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
603 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
604 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
605 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
606
607 // Single-precision floating-point arithmetic helper functions
608 // RTABI chapter 4.1.2, Table 4
609 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
610 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
611 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
612 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
613
614 // Single-precision floating-point comparison helper functions
615 // RTABI chapter 4.1.2, Table 5
616 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
617 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
618 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
619 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
620 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
621 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
622 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
623
624 // Floating-point to integer conversions.
625 // RTABI chapter 4.1.2, Table 6
626 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
627 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
628 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
629 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
630 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
631 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
634
635 // Conversions between floating types.
636 // RTABI chapter 4.1.2, Table 7
637 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
639 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
640
641 // Integer to floating-point conversions.
642 // RTABI chapter 4.1.2, Table 8
643 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
644 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
645 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
646 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
647 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
648 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
649 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
650 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
651
652 // Long long helper functions
653 // RTABI chapter 4.2, Table 9
654 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
655 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
656 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
657 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
658
659 // Integer division functions
660 // RTABI chapter 4.3.1
661 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
662 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
663 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
664 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
665 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
666 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
667 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
668 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
669 };
670
671 for (const auto &LC : LibraryCalls) {
672 setLibcallName(LC.Op, LC.Name);
673 setLibcallCallingConv(LC.Op, LC.CC);
674 if (LC.Cond != ISD::SETCC_INVALID)
675 setCmpLibcallCC(LC.Op, LC.Cond);
676 }
677
678 // EABI dependent RTLIB
679 if (TM.Options.EABIVersion == EABI::EABI4 ||
680 TM.Options.EABIVersion == EABI::EABI5) {
681 static const struct {
682 const RTLIB::Libcall Op;
683 const char *const Name;
684 const CallingConv::ID CC;
685 const ISD::CondCode Cond;
686 } MemOpsLibraryCalls[] = {
687 // Memory operations
688 // RTABI chapter 4.3.4
689 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
690 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
691 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
692 };
693
694 for (const auto &LC : MemOpsLibraryCalls) {
695 setLibcallName(LC.Op, LC.Name);
696 setLibcallCallingConv(LC.Op, LC.CC);
697 if (LC.Cond != ISD::SETCC_INVALID)
698 setCmpLibcallCC(LC.Op, LC.Cond);
699 }
700 }
701 }
702
703 if (Subtarget->isTargetWindows()) {
704 static const struct {
705 const RTLIB::Libcall Op;
706 const char * const Name;
707 const CallingConv::ID CC;
708 } LibraryCalls[] = {
709 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
710 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
711 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
712 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
713 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
714 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
715 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
716 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
717 };
718
719 for (const auto &LC : LibraryCalls) {
720 setLibcallName(LC.Op, LC.Name);
721 setLibcallCallingConv(LC.Op, LC.CC);
722 }
723 }
724
725 // Use divmod compiler-rt calls for iOS 5.0 and later.
726 if (Subtarget->isTargetMachO() &&
727 !(Subtarget->isTargetIOS() &&
728 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
729 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
730 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
731 }
732
733 // The half <-> float conversion functions are always soft-float on
734 // non-watchos platforms, but are needed for some targets which use a
735 // hard-float calling convention by default.
736 if (!Subtarget->isTargetWatchABI()) {
737 if (Subtarget->isAAPCS_ABI()) {
738 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
739 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
740 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
741 } else {
742 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
743 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
744 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
745 }
746 }
747
748 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
749 // a __gnu_ prefix (which is the default).
750 if (Subtarget->isTargetAEABI()) {
751 static const struct {
752 const RTLIB::Libcall Op;
753 const char * const Name;
754 const CallingConv::ID CC;
755 } LibraryCalls[] = {
756 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
757 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
758 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
759 };
760
761 for (const auto &LC : LibraryCalls) {
762 setLibcallName(LC.Op, LC.Name);
763 setLibcallCallingConv(LC.Op, LC.CC);
764 }
765 }
766
767 if (Subtarget->isThumb1Only())
768 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
769 else
770 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
771
772 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
773 Subtarget->hasFPRegs()) {
774 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
775 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
776
781
782 if (!Subtarget->hasVFP2Base())
783 setAllExpand(MVT::f32);
784 if (!Subtarget->hasFP64())
785 setAllExpand(MVT::f64);
786 }
787
788 if (Subtarget->hasFullFP16()) {
789 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
792
795 }
796
797 if (Subtarget->hasBF16()) {
798 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
799 setAllExpand(MVT::bf16);
800 if (!Subtarget->hasFullFP16())
802 }
803
805 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
806 setTruncStoreAction(VT, InnerVT, Expand);
807 addAllExtLoads(VT, InnerVT, Expand);
808 }
809
812
814 }
815
818
821
822 if (Subtarget->hasMVEIntegerOps())
823 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
824
825 // Combine low-overhead loop intrinsics so that we can lower i1 types.
826 if (Subtarget->hasLOB()) {
828 }
829
830 if (Subtarget->hasNEON()) {
831 addDRTypeForNEON(MVT::v2f32);
832 addDRTypeForNEON(MVT::v8i8);
833 addDRTypeForNEON(MVT::v4i16);
834 addDRTypeForNEON(MVT::v2i32);
835 addDRTypeForNEON(MVT::v1i64);
836
837 addQRTypeForNEON(MVT::v4f32);
838 addQRTypeForNEON(MVT::v2f64);
839 addQRTypeForNEON(MVT::v16i8);
840 addQRTypeForNEON(MVT::v8i16);
841 addQRTypeForNEON(MVT::v4i32);
842 addQRTypeForNEON(MVT::v2i64);
843
844 if (Subtarget->hasFullFP16()) {
845 addQRTypeForNEON(MVT::v8f16);
846 addDRTypeForNEON(MVT::v4f16);
847 }
848
849 if (Subtarget->hasBF16()) {
850 addQRTypeForNEON(MVT::v8bf16);
851 addDRTypeForNEON(MVT::v4bf16);
852 }
853 }
854
855 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
856 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
857 // none of Neon, MVE or VFP supports any arithmetic operations on it.
858 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
859 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
860 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
861 // FIXME: Code duplication: FDIV and FREM are expanded always, see
862 // ARMTargetLowering::addTypeForNEON method for details.
863 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
864 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
865 // FIXME: Create unittest.
866 // In another words, find a way when "copysign" appears in DAG with vector
867 // operands.
869 // FIXME: Code duplication: SETCC has custom operation action, see
870 // ARMTargetLowering::addTypeForNEON method for details.
872 // FIXME: Create unittest for FNEG and for FABS.
873 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
874 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
876 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
877 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
878 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
879 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
882 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
885 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
891 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
892 }
893
894 if (Subtarget->hasNEON()) {
895 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
896 // supported for v4f32.
898 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
899 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
900 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
901 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
904 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
912
913 // Mark v2f32 intrinsics.
915 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
916 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
917 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
918 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
921 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
929
930 // Neon does not support some operations on v1i64 and v2i64 types.
931 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
932 // Custom handling for some quad-vector types to detect VMULL.
933 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
934 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
935 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
936 // Custom handling for some vector types to avoid expensive expansions
937 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
939 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
941 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
942 // a destination type that is wider than the source, and nor does
943 // it have a FP_TO_[SU]INT instruction with a narrower destination than
944 // source.
953
956
957 // NEON does not have single instruction CTPOP for vectors with element
958 // types wider than 8-bits. However, custom lowering can leverage the
959 // v8i8/v16i8 vcnt instruction.
966
967 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
968 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
969
970 // NEON does not have single instruction CTTZ for vectors.
972 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
973 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
974 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
975
976 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
977 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
978 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
979 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
980
985
990
994 }
995
996 // NEON only has FMA instructions as of VFP4.
997 if (!Subtarget->hasVFP4Base()) {
998 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
999 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
1000 }
1001
1004
1005 // It is legal to extload from v4i8 to v4i16 or v4i32.
1006 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
1007 MVT::v2i32}) {
1012 }
1013 }
1014
1015 for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1016 MVT::v4i32}) {
1021 }
1022 }
1023
1024 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1031 }
1032 if (Subtarget->hasMVEIntegerOps()) {
1035 ISD::SETCC});
1036 }
1037 if (Subtarget->hasMVEFloatOps()) {
1039 }
1040
1041 if (!Subtarget->hasFP64()) {
1042 // When targeting a floating-point unit with only single-precision
1043 // operations, f64 is legal for the few double-precision instructions which
1044 // are present However, no double-precision operations other than moves,
1045 // loads and stores are provided by the hardware.
1083 }
1084
1085 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1088 if (Subtarget->hasFullFP16()) {
1091 }
1092 }
1093
1094 if (!Subtarget->hasFP16()) {
1097 }
1098
1100
1101 // ARM does not have floating-point extending loads.
1102 for (MVT VT : MVT::fp_valuetypes()) {
1103 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1104 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
1105 }
1106
1107 // ... or truncating stores
1108 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1109 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1110 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1111
1112 // ARM does not have i1 sign extending load.
1113 for (MVT VT : MVT::integer_valuetypes())
1114 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
1115
1116 // ARM supports all 4 flavors of integer indexed load / store.
1117 if (!Subtarget->isThumb1Only()) {
1118 for (unsigned im = (unsigned)ISD::PRE_INC;
1120 setIndexedLoadAction(im, MVT::i1, Legal);
1121 setIndexedLoadAction(im, MVT::i8, Legal);
1122 setIndexedLoadAction(im, MVT::i16, Legal);
1123 setIndexedLoadAction(im, MVT::i32, Legal);
1124 setIndexedStoreAction(im, MVT::i1, Legal);
1125 setIndexedStoreAction(im, MVT::i8, Legal);
1126 setIndexedStoreAction(im, MVT::i16, Legal);
1127 setIndexedStoreAction(im, MVT::i32, Legal);
1128 }
1129 } else {
1130 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1133 }
1134
1139
1142 if (Subtarget->hasDSP()) {
1151 }
1152 if (Subtarget->hasBaseDSP()) {
1155 }
1156
1157 // i64 operation support.
1160 if (Subtarget->isThumb1Only()) {
1163 }
1164 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1165 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1167
1177
1178 // MVE lowers 64 bit shifts to lsll and lsrl
1179 // assuming that ISD::SRL and SRA of i64 are already marked custom
1180 if (Subtarget->hasMVEIntegerOps())
1182
1183 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1184 if (Subtarget->isThumb1Only()) {
1188 }
1189
1190 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1192
1193 // ARM does not have ROTL.
1198 }
1201 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1204 }
1205
1206 // @llvm.readcyclecounter requires the Performance Monitors extension.
1207 // Default to the 0 expansion on unsupported platforms.
1208 // FIXME: Technically there are older ARM CPUs that have
1209 // implementation-specific ways of obtaining this information.
1210 if (Subtarget->hasPerfMon())
1212
1213 // Only ARMv6 has BSWAP.
1214 if (!Subtarget->hasV6Ops())
1216
1217 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1218 : Subtarget->hasDivideInARMMode();
1219 if (!hasDivide) {
1220 // These are expanded into libcalls if the cpu doesn't have HW divider.
1223 }
1224
1225 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1228
1231 }
1232
1235
1236 // Register based DivRem for AEABI (RTABI 4.2)
1237 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1238 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1239 Subtarget->isTargetWindows()) {
1242 HasStandaloneRem = false;
1243
1244 if (Subtarget->isTargetWindows()) {
1245 const struct {
1246 const RTLIB::Libcall Op;
1247 const char * const Name;
1248 const CallingConv::ID CC;
1249 } LibraryCalls[] = {
1250 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1251 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1252 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1253 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1254
1255 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1256 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1257 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1258 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1259 };
1260
1261 for (const auto &LC : LibraryCalls) {
1262 setLibcallName(LC.Op, LC.Name);
1263 setLibcallCallingConv(LC.Op, LC.CC);
1264 }
1265 } else {
1266 const struct {
1267 const RTLIB::Libcall Op;
1268 const char * const Name;
1269 const CallingConv::ID CC;
1270 } LibraryCalls[] = {
1271 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1272 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1273 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1274 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1275
1276 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1277 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1278 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1279 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1280 };
1281
1282 for (const auto &LC : LibraryCalls) {
1283 setLibcallName(LC.Op, LC.Name);
1284 setLibcallCallingConv(LC.Op, LC.CC);
1285 }
1286 }
1287
1292 } else {
1295 }
1296
1297 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
1298 // MSVCRT doesn't have powi; fall back to pow
1299 setLibcallName(RTLIB::POWI_F32, nullptr);
1300 setLibcallName(RTLIB::POWI_F64, nullptr);
1301 }
1302
1307
1308 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1310
1311 // Use the default implementation.
1313 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1315 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1318
1319 if (Subtarget->isTargetWindows())
1321 else
1323
1324 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1325 // the default expansion.
1326 InsertFencesForAtomic = false;
1327 if (Subtarget->hasAnyDataBarrier() &&
1328 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1329 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1330 // to ldrex/strex loops already.
1332 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1334
1335 // On v8, we have particularly efficient implementations of atomic fences
1336 // if they can be combined with nearby atomic loads and stores.
1337 if (!Subtarget->hasAcquireRelease() ||
1338 getTargetMachine().getOptLevel() == CodeGenOptLevel::None) {
1339 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1340 InsertFencesForAtomic = true;
1341 }
1342 } else {
1343 // If there's anything we can use as a barrier, go through custom lowering
1344 // for ATOMIC_FENCE.
1345 // If target has DMB in thumb, Fences can be inserted.
1346 if (Subtarget->hasDataBarrier())
1347 InsertFencesForAtomic = true;
1348
1350 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1351
1352 // Set them all for libcall, which will force libcalls.
1365 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1366 // Unordered/Monotonic case.
1367 if (!InsertFencesForAtomic) {
1370 }
1371 }
1372
1373 // Compute supported atomic widths.
1374 if (Subtarget->isTargetLinux() ||
1375 (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1376 // For targets where __sync_* routines are reliably available, we use them
1377 // if necessary.
1378 //
1379 // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1380 // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1381 //
1382 // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1383 // such targets should provide __sync_* routines, which use the ARM mode
1384 // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1385 // encoding; see ARMISD::MEMBARRIER_MCR.)
1387 } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1388 Subtarget->hasForced32BitAtomics()) {
1389 // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1391 } else {
1392 // We can't assume anything about other targets; just use libatomic
1393 // routines.
1395 }
1396
1398
1400
1401 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1402 if (!Subtarget->hasV6Ops()) {
1405 }
1407
1408 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1409 !Subtarget->isThumb1Only()) {
1410 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1411 // iff target supports vfp2.
1421 }
1422
1423 // We want to custom lower some of our intrinsics.
1428 if (Subtarget->useSjLjEH())
1429 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1430
1440 if (Subtarget->hasFullFP16()) {
1444 }
1445
1447
1450 if (Subtarget->hasFullFP16())
1454 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1455
1456 // We don't support sin/cos/fmod/copysign/pow
1465 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1466 !Subtarget->isThumb1Only()) {
1469 }
1472
1473 if (!Subtarget->hasVFP4Base()) {
1476 }
1477
1478 // Various VFP goodness
1479 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1480 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1481 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1484 }
1485
1486 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1487 if (!Subtarget->hasFP16()) {
1490 }
1491
1492 // Strict floating-point comparisons need custom lowering.
1499 }
1500
1501 // Use __sincos_stret if available.
1502 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1503 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1506 }
1507
1508 // FP-ARMv8 implements a lot of rounding-like FP operations.
1509 if (Subtarget->hasFPARMv8Base()) {
1518 if (Subtarget->hasNEON()) {
1523 }
1524
1525 if (Subtarget->hasFP64()) {
1534 }
1535 }
1536
1537 // FP16 often need to be promoted to call lib functions
1538 if (Subtarget->hasFullFP16()) {
1552
1554 }
1555
1556 if (Subtarget->hasNEON()) {
1557 // vmin and vmax aren't available in a scalar form, so we can use
1558 // a NEON instruction with an undef lane instead.
1567
1568 if (Subtarget->hasFullFP16()) {
1573
1578 }
1579 }
1580
1581 // We have target-specific dag combine patterns for the following nodes:
1582 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1585
1586 if (Subtarget->hasMVEIntegerOps())
1588
1589 if (Subtarget->hasV6Ops())
1591 if (Subtarget->isThumb1Only())
1593 // Attempt to lower smin/smax to ssat/usat
1594 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1595 Subtarget->isThumb2()) {
1597 }
1598
1600
1601 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1602 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1604 else
1606
1607 //// temporary - rewrite interface to use type
1610 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1612 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1614
1615 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1616 // are at least 4 bytes aligned.
1618
1619 // Prefer likely predicted branches to selects on out-of-order cores.
1620 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1621
1622 setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1624
1625 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1626
1627 if (Subtarget->isThumb() || Subtarget->isThumb2())
1629}
1630
1632 return Subtarget->useSoftFloat();
1633}
1634
1635// FIXME: It might make sense to define the representative register class as the
1636// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1637// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1638// SPR's representative would be DPR_VFP2. This should work well if register
1639// pressure tracking were modified such that a register use would increment the
1640// pressure of the register class's representative and all of it's super
1641// classes' representatives transitively. We have not implemented this because
1642// of the difficulty prior to coalescing of modeling operand register classes
1643// due to the common occurrence of cross class copies and subregister insertions
1644// and extractions.
1645std::pair<const TargetRegisterClass *, uint8_t>
1647 MVT VT) const {
1648 const TargetRegisterClass *RRC = nullptr;
1649 uint8_t Cost = 1;
1650 switch (VT.SimpleTy) {
1651 default:
1653 // Use DPR as representative register class for all floating point
1654 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1655 // the cost is 1 for both f32 and f64.
1656 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1657 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1658 RRC = &ARM::DPRRegClass;
1659 // When NEON is used for SP, only half of the register file is available
1660 // because operations that define both SP and DP results will be constrained
1661 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1662 // coalescing by double-counting the SP regs. See the FIXME above.
1663 if (Subtarget->useNEONForSinglePrecisionFP())
1664 Cost = 2;
1665 break;
1666 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1667 case MVT::v4f32: case MVT::v2f64:
1668 RRC = &ARM::DPRRegClass;
1669 Cost = 2;
1670 break;
1671 case MVT::v4i64:
1672 RRC = &ARM::DPRRegClass;
1673 Cost = 4;
1674 break;
1675 case MVT::v8i64:
1676 RRC = &ARM::DPRRegClass;
1677 Cost = 8;
1678 break;
1679 }
1680 return std::make_pair(RRC, Cost);
1681}
1682
1683const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1684#define MAKE_CASE(V) \
1685 case V: \
1686 return #V;
1687 switch ((ARMISD::NodeType)Opcode) {
1689 break;
1893#undef MAKE_CASE
1894 }
1895 return nullptr;
1896}
1897
1899 EVT VT) const {
1900 if (!VT.isVector())
1901 return getPointerTy(DL);
1902
1903 // MVE has a predicate register.
1904 if ((Subtarget->hasMVEIntegerOps() &&
1905 (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1906 VT == MVT::v16i8)) ||
1907 (Subtarget->hasMVEFloatOps() &&
1908 (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1909 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1911}
1912
1913/// getRegClassFor - Return the register class that should be used for the
1914/// specified value type.
1915const TargetRegisterClass *
1916ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1917 (void)isDivergent;
1918 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1919 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1920 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1921 // MVE Q registers.
1922 if (Subtarget->hasNEON()) {
1923 if (VT == MVT::v4i64)
1924 return &ARM::QQPRRegClass;
1925 if (VT == MVT::v8i64)
1926 return &ARM::QQQQPRRegClass;
1927 }
1928 if (Subtarget->hasMVEIntegerOps()) {
1929 if (VT == MVT::v4i64)
1930 return &ARM::MQQPRRegClass;
1931 if (VT == MVT::v8i64)
1932 return &ARM::MQQQQPRRegClass;
1933 }
1935}
1936
1937// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1938// source/dest is aligned and the copy size is large enough. We therefore want
1939// to align such objects passed to memory intrinsics.
1941 Align &PrefAlign) const {
1942 if (!isa<MemIntrinsic>(CI))
1943 return false;
1944 MinSize = 8;
1945 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1946 // cycle faster than 4-byte aligned LDM.
1947 PrefAlign =
1948 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1949 return true;
1950}
1951
1952// Create a fast isel object.
1953FastISel *
1955 const TargetLibraryInfo *libInfo) const {
1956 return ARM::createFastISel(funcInfo, libInfo);
1957}
1958
1960 unsigned NumVals = N->getNumValues();
1961 if (!NumVals)
1962 return Sched::RegPressure;
1963
1964 for (unsigned i = 0; i != NumVals; ++i) {
1965 EVT VT = N->getValueType(i);
1966 if (VT == MVT::Glue || VT == MVT::Other)
1967 continue;
1968 if (VT.isFloatingPoint() || VT.isVector())
1969 return Sched::ILP;
1970 }
1971
1972 if (!N->isMachineOpcode())
1973 return Sched::RegPressure;
1974
1975 // Load are scheduled for latency even if there instruction itinerary
1976 // is not available.
1977 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1978 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1979
1980 if (MCID.getNumDefs() == 0)
1981 return Sched::RegPressure;
1982 if (!Itins->isEmpty() &&
1983 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2U)
1984 return Sched::ILP;
1985
1986 return Sched::RegPressure;
1987}
1988
1989//===----------------------------------------------------------------------===//
1990// Lowering Code
1991//===----------------------------------------------------------------------===//
1992
1993static bool isSRL16(const SDValue &Op) {
1994 if (Op.getOpcode() != ISD::SRL)
1995 return false;
1996 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1997 return Const->getZExtValue() == 16;
1998 return false;
1999}
2000
2001static bool isSRA16(const SDValue &Op) {
2002 if (Op.getOpcode() != ISD::SRA)
2003 return false;
2004 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2005 return Const->getZExtValue() == 16;
2006 return false;
2007}
2008
2009static bool isSHL16(const SDValue &Op) {
2010 if (Op.getOpcode() != ISD::SHL)
2011 return false;
2012 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2013 return Const->getZExtValue() == 16;
2014 return false;
2015}
2016
2017// Check for a signed 16-bit value. We special case SRA because it makes it
2018// more simple when also looking for SRAs that aren't sign extending a
2019// smaller value. Without the check, we'd need to take extra care with
2020// checking order for some operations.
2021static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
2022 if (isSRA16(Op))
2023 return isSHL16(Op.getOperand(0));
2024 return DAG.ComputeNumSignBits(Op) == 17;
2025}
2026
2027/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2029 switch (CC) {
2030 default: llvm_unreachable("Unknown condition code!");
2031 case ISD::SETNE: return ARMCC::NE;
2032 case ISD::SETEQ: return ARMCC::EQ;
2033 case ISD::SETGT: return ARMCC::GT;
2034 case ISD::SETGE: return ARMCC::GE;
2035 case ISD::SETLT: return ARMCC::LT;
2036 case ISD::SETLE: return ARMCC::LE;
2037 case ISD::SETUGT: return ARMCC::HI;
2038 case ISD::SETUGE: return ARMCC::HS;
2039 case ISD::SETULT: return ARMCC::LO;
2040 case ISD::SETULE: return ARMCC::LS;
2041 }
2042}
2043
2044/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2046 ARMCC::CondCodes &CondCode2) {
2047 CondCode2 = ARMCC::AL;
2048 switch (CC) {
2049 default: llvm_unreachable("Unknown FP condition!");
2050 case ISD::SETEQ:
2051 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
2052 case ISD::SETGT:
2053 case ISD::SETOGT: CondCode = ARMCC::GT; break;
2054 case ISD::SETGE:
2055 case ISD::SETOGE: CondCode = ARMCC::GE; break;
2056 case ISD::SETOLT: CondCode = ARMCC::MI; break;
2057 case ISD::SETOLE: CondCode = ARMCC::LS; break;
2058 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2059 case ISD::SETO: CondCode = ARMCC::VC; break;
2060 case ISD::SETUO: CondCode = ARMCC::VS; break;
2061 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2062 case ISD::SETUGT: CondCode = ARMCC::HI; break;
2063 case ISD::SETUGE: CondCode = ARMCC::PL; break;
2064 case ISD::SETLT:
2065 case ISD::SETULT: CondCode = ARMCC::LT; break;
2066 case ISD::SETLE:
2067 case ISD::SETULE: CondCode = ARMCC::LE; break;
2068 case ISD::SETNE:
2069 case ISD::SETUNE: CondCode = ARMCC::NE; break;
2070 }
2071}
2072
2073//===----------------------------------------------------------------------===//
2074// Calling Convention Implementation
2075//===----------------------------------------------------------------------===//
2076
2077/// getEffectiveCallingConv - Get the effective calling convention, taking into
2078/// account presence of floating point hardware and calling convention
2079/// limitations, such as support for variadic functions.
2081ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2082 bool isVarArg) const {
2083 switch (CC) {
2084 default:
2085 report_fatal_error("Unsupported calling convention");
2088 case CallingConv::GHC:
2090 return CC;
2096 case CallingConv::Swift:
2099 case CallingConv::C:
2100 case CallingConv::Tail:
2101 if (!Subtarget->isAAPCS_ABI())
2102 return CallingConv::ARM_APCS;
2103 else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
2104 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2105 !isVarArg)
2107 else
2109 case CallingConv::Fast:
2111 if (!Subtarget->isAAPCS_ABI()) {
2112 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2113 return CallingConv::Fast;
2114 return CallingConv::ARM_APCS;
2115 } else if (Subtarget->hasVFP2Base() &&
2116 !Subtarget->isThumb1Only() && !isVarArg)
2118 else
2120 }
2121}
2122
2124 bool isVarArg) const {
2125 return CCAssignFnForNode(CC, false, isVarArg);
2126}
2127
2129 bool isVarArg) const {
2130 return CCAssignFnForNode(CC, true, isVarArg);
2131}
2132
2133/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2134/// CallingConvention.
2135CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2136 bool Return,
2137 bool isVarArg) const {
2138 switch (getEffectiveCallingConv(CC, isVarArg)) {
2139 default:
2140 report_fatal_error("Unsupported calling convention");
2142 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2144 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2146 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2147 case CallingConv::Fast:
2148 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2149 case CallingConv::GHC:
2150 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2152 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2154 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2156 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2157 }
2158}
2159
2160SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2161 MVT LocVT, MVT ValVT, SDValue Val) const {
2162 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2163 Val);
2164 if (Subtarget->hasFullFP16()) {
2165 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2166 } else {
2167 Val = DAG.getNode(ISD::TRUNCATE, dl,
2168 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2169 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2170 }
2171 return Val;
2172}
2173
2174SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2175 MVT LocVT, MVT ValVT,
2176 SDValue Val) const {
2177 if (Subtarget->hasFullFP16()) {
2178 Val = DAG.getNode(ARMISD::VMOVrh, dl,
2179 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2180 } else {
2181 Val = DAG.getNode(ISD::BITCAST, dl,
2182 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2183 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2184 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2185 }
2186 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2187}
2188
2189/// LowerCallResult - Lower the result values of a call into the
2190/// appropriate copies out of appropriate physical registers.
2191SDValue ARMTargetLowering::LowerCallResult(
2192 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
2193 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2194 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2195 SDValue ThisVal) const {
2196 // Assign locations to each value returned by this call.
2198 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2199 *DAG.getContext());
2200 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2201
2202 // Copy all of the result registers out of their specified physreg.
2203 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2204 CCValAssign VA = RVLocs[i];
2205
2206 // Pass 'this' value directly from the argument to return value, to avoid
2207 // reg unit interference
2208 if (i == 0 && isThisReturn) {
2209 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2210 "unexpected return calling convention register assignment");
2211 InVals.push_back(ThisVal);
2212 continue;
2213 }
2214
2215 SDValue Val;
2216 if (VA.needsCustom() &&
2217 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2218 // Handle f64 or half of a v2f64.
2219 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2220 InGlue);
2221 Chain = Lo.getValue(1);
2222 InGlue = Lo.getValue(2);
2223 VA = RVLocs[++i]; // skip ahead to next loc
2224 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2225 InGlue);
2226 Chain = Hi.getValue(1);
2227 InGlue = Hi.getValue(2);
2228 if (!Subtarget->isLittle())
2229 std::swap (Lo, Hi);
2230 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2231
2232 if (VA.getLocVT() == MVT::v2f64) {
2233 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2234 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2235 DAG.getConstant(0, dl, MVT::i32));
2236
2237 VA = RVLocs[++i]; // skip ahead to next loc
2238 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2239 Chain = Lo.getValue(1);
2240 InGlue = Lo.getValue(2);
2241 VA = RVLocs[++i]; // skip ahead to next loc
2242 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2243 Chain = Hi.getValue(1);
2244 InGlue = Hi.getValue(2);
2245 if (!Subtarget->isLittle())
2246 std::swap (Lo, Hi);
2247 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2248 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2249 DAG.getConstant(1, dl, MVT::i32));
2250 }
2251 } else {
2252 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2253 InGlue);
2254 Chain = Val.getValue(1);
2255 InGlue = Val.getValue(2);
2256 }
2257
2258 switch (VA.getLocInfo()) {
2259 default: llvm_unreachable("Unknown loc info!");
2260 case CCValAssign::Full: break;
2261 case CCValAssign::BCvt:
2262 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2263 break;
2264 }
2265
2266 // f16 arguments have their size extended to 4 bytes and passed as if they
2267 // had been copied to the LSBs of a 32-bit register.
2268 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2269 if (VA.needsCustom() &&
2270 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2271 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2272
2273 InVals.push_back(Val);
2274 }
2275
2276 return Chain;
2277}
2278
2279std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2280 const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2281 bool IsTailCall, int SPDiff) const {
2282 SDValue DstAddr;
2283 MachinePointerInfo DstInfo;
2284 int32_t Offset = VA.getLocMemOffset();
2286
2287 if (IsTailCall) {
2288 Offset += SPDiff;
2289 auto PtrVT = getPointerTy(DAG.getDataLayout());
2290 int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2291 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2292 DstAddr = DAG.getFrameIndex(FI, PtrVT);
2293 DstInfo =
2295 } else {
2296 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2297 DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2298 StackPtr, PtrOff);
2299 DstInfo =
2301 }
2302
2303 return std::make_pair(DstAddr, DstInfo);
2304}
2305
2306void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2307 SDValue Chain, SDValue &Arg,
2308 RegsToPassVector &RegsToPass,
2309 CCValAssign &VA, CCValAssign &NextVA,
2310 SDValue &StackPtr,
2311 SmallVectorImpl<SDValue> &MemOpChains,
2312 bool IsTailCall,
2313 int SPDiff) const {
2314 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2315 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2316 unsigned id = Subtarget->isLittle() ? 0 : 1;
2317 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2318
2319 if (NextVA.isRegLoc())
2320 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2321 else {
2322 assert(NextVA.isMemLoc());
2323 if (!StackPtr.getNode())
2324 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2326
2327 SDValue DstAddr;
2328 MachinePointerInfo DstInfo;
2329 std::tie(DstAddr, DstInfo) =
2330 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2331 MemOpChains.push_back(
2332 DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2333 }
2334}
2335
2336static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2337 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2339}
2340
2341/// LowerCall - Lowering a call into a callseq_start <-
2342/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2343/// nodes.
2344SDValue
2345ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2346 SmallVectorImpl<SDValue> &InVals) const {
2347 SelectionDAG &DAG = CLI.DAG;
2348 SDLoc &dl = CLI.DL;
2350 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2352 SDValue Chain = CLI.Chain;
2353 SDValue Callee = CLI.Callee;
2354 bool &isTailCall = CLI.IsTailCall;
2355 CallingConv::ID CallConv = CLI.CallConv;
2356 bool doesNotRet = CLI.DoesNotReturn;
2357 bool isVarArg = CLI.IsVarArg;
2358
2362 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2363 bool isThisReturn = false;
2364 bool isCmseNSCall = false;
2365 bool isSibCall = false;
2366 bool PreferIndirect = false;
2367 bool GuardWithBTI = false;
2368
2369 // Analyze operands of the call, assigning locations to each operand.
2371 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2372 *DAG.getContext());
2373 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2374
2375 // Lower 'returns_twice' calls to a pseudo-instruction.
2376 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2377 !Subtarget->noBTIAtReturnTwice())
2378 GuardWithBTI = AFI->branchTargetEnforcement();
2379
2380 // Determine whether this is a non-secure function call.
2381 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2382 isCmseNSCall = true;
2383
2384 // Disable tail calls if they're not supported.
2385 if (!Subtarget->supportsTailCall())
2386 isTailCall = false;
2387
2388 // For both the non-secure calls and the returns from a CMSE entry function,
2389 // the function needs to do some extra work afte r the call, or before the
2390 // return, respectively, thus it cannot end with atail call
2391 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2392 isTailCall = false;
2393
2394 if (isa<GlobalAddressSDNode>(Callee)) {
2395 // If we're optimizing for minimum size and the function is called three or
2396 // more times in this block, we can improve codesize by calling indirectly
2397 // as BLXr has a 16-bit encoding.
2398 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2399 if (CLI.CB) {
2400 auto *BB = CLI.CB->getParent();
2401 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2402 count_if(GV->users(), [&BB](const User *U) {
2403 return isa<Instruction>(U) &&
2404 cast<Instruction>(U)->getParent() == BB;
2405 }) > 2;
2406 }
2407 }
2408 if (isTailCall) {
2409 // Check if it's really possible to do a tail call.
2410 isTailCall =
2411 IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, PreferIndirect);
2412
2413 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2414 CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2415 isSibCall = true;
2416
2417 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2418 // detected sibcalls.
2419 if (isTailCall)
2420 ++NumTailCalls;
2421 }
2422
2423 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2424 report_fatal_error("failed to perform tail call elimination on a call "
2425 "site marked musttail");
2426
2427 // Get a count of how many bytes are to be pushed on the stack.
2428 unsigned NumBytes = CCInfo.getStackSize();
2429
2430 // SPDiff is the byte offset of the call's argument area from the callee's.
2431 // Stores to callee stack arguments will be placed in FixedStackSlots offset
2432 // by this amount for a tail call. In a sibling call it must be 0 because the
2433 // caller will deallocate the entire stack and the callee still expects its
2434 // arguments to begin at SP+0. Completely unused for non-tail calls.
2435 int SPDiff = 0;
2436
2437 if (isTailCall && !isSibCall) {
2438 auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2439 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2440
2441 // Since callee will pop argument stack as a tail call, we must keep the
2442 // popped size 16-byte aligned.
2443 Align StackAlign = DAG.getDataLayout().getStackAlignment();
2444 NumBytes = alignTo(NumBytes, StackAlign);
2445
2446 // SPDiff will be negative if this tail call requires more space than we
2447 // would automatically have in our incoming argument space. Positive if we
2448 // can actually shrink the stack.
2449 SPDiff = NumReusableBytes - NumBytes;
2450
2451 // If this call requires more stack than we have available from
2452 // LowerFormalArguments, tell FrameLowering to reserve space for it.
2453 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2454 AFI->setArgRegsSaveSize(-SPDiff);
2455 }
2456
2457 if (isSibCall) {
2458 // For sibling tail calls, memory operands are available in our caller's stack.
2459 NumBytes = 0;
2460 } else {
2461 // Adjust the stack pointer for the new arguments...
2462 // These operations are automatically eliminated by the prolog/epilog pass
2463 Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2464 }
2465
2467 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2468
2469 RegsToPassVector RegsToPass;
2470 SmallVector<SDValue, 8> MemOpChains;
2471
2472 // During a tail call, stores to the argument area must happen after all of
2473 // the function's incoming arguments have been loaded because they may alias.
2474 // This is done by folding in a TokenFactor from LowerFormalArguments, but
2475 // there's no point in doing so repeatedly so this tracks whether that's
2476 // happened yet.
2477 bool AfterFormalArgLoads = false;
2478
2479 // Walk the register/memloc assignments, inserting copies/loads. In the case
2480 // of tail call optimization, arguments are handled later.
2481 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2482 i != e;
2483 ++i, ++realArgIdx) {
2484 CCValAssign &VA = ArgLocs[i];
2485 SDValue Arg = OutVals[realArgIdx];
2486 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2487 bool isByVal = Flags.isByVal();
2488
2489 // Promote the value if needed.
2490 switch (VA.getLocInfo()) {
2491 default: llvm_unreachable("Unknown loc info!");
2492 case CCValAssign::Full: break;
2493 case CCValAssign::SExt:
2494 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2495 break;
2496 case CCValAssign::ZExt:
2497 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2498 break;
2499 case CCValAssign::AExt:
2500 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2501 break;
2502 case CCValAssign::BCvt:
2503 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2504 break;
2505 }
2506
2507 if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2508 Chain = DAG.getStackArgumentTokenFactor(Chain);
2509 AfterFormalArgLoads = true;
2510 }
2511
2512 // f16 arguments have their size extended to 4 bytes and passed as if they
2513 // had been copied to the LSBs of a 32-bit register.
2514 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2515 if (VA.needsCustom() &&
2516 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2517 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2518 } else {
2519 // f16 arguments could have been extended prior to argument lowering.
2520 // Mask them arguments if this is a CMSE nonsecure call.
2521 auto ArgVT = Outs[realArgIdx].ArgVT;
2522 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2523 auto LocBits = VA.getLocVT().getSizeInBits();
2524 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2525 SDValue Mask =
2526 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2527 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2528 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2529 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2530 }
2531 }
2532
2533 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2534 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2535 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2536 DAG.getConstant(0, dl, MVT::i32));
2537 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2538 DAG.getConstant(1, dl, MVT::i32));
2539
2540 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2541 StackPtr, MemOpChains, isTailCall, SPDiff);
2542
2543 VA = ArgLocs[++i]; // skip ahead to next loc
2544 if (VA.isRegLoc()) {
2545 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2546 StackPtr, MemOpChains, isTailCall, SPDiff);
2547 } else {
2548 assert(VA.isMemLoc());
2549 SDValue DstAddr;
2550 MachinePointerInfo DstInfo;
2551 std::tie(DstAddr, DstInfo) =
2552 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2553 MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2554 }
2555 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2556 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2557 StackPtr, MemOpChains, isTailCall, SPDiff);
2558 } else if (VA.isRegLoc()) {
2559 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2560 Outs[0].VT == MVT::i32) {
2561 assert(VA.getLocVT() == MVT::i32 &&
2562 "unexpected calling convention register assignment");
2563 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2564 "unexpected use of 'returned'");
2565 isThisReturn = true;
2566 }
2567 const TargetOptions &Options = DAG.getTarget().Options;
2568 if (Options.EmitCallSiteInfo)
2569 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
2570 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2571 } else if (isByVal) {
2572 assert(VA.isMemLoc());
2573 unsigned offset = 0;
2574
2575 // True if this byval aggregate will be split between registers
2576 // and memory.
2577 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2578 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2579
2580 if (CurByValIdx < ByValArgsCount) {
2581
2582 unsigned RegBegin, RegEnd;
2583 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2584
2585 EVT PtrVT =
2587 unsigned int i, j;
2588 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2589 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2590 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2591 SDValue Load =
2592 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2593 DAG.InferPtrAlign(AddArg));
2594 MemOpChains.push_back(Load.getValue(1));
2595 RegsToPass.push_back(std::make_pair(j, Load));
2596 }
2597
2598 // If parameter size outsides register area, "offset" value
2599 // helps us to calculate stack slot for remained part properly.
2600 offset = RegEnd - RegBegin;
2601
2602 CCInfo.nextInRegsParam();
2603 }
2604
2605 if (Flags.getByValSize() > 4*offset) {
2606 auto PtrVT = getPointerTy(DAG.getDataLayout());
2607 SDValue Dst;
2608 MachinePointerInfo DstInfo;
2609 std::tie(Dst, DstInfo) =
2610 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2611 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2612 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2613 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2614 MVT::i32);
2615 SDValue AlignNode =
2616 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2617
2618 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2619 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2620 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2621 Ops));
2622 }
2623 } else {
2624 assert(VA.isMemLoc());
2625 SDValue DstAddr;
2626 MachinePointerInfo DstInfo;
2627 std::tie(DstAddr, DstInfo) =
2628 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2629
2630 SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2631 MemOpChains.push_back(Store);
2632 }
2633 }
2634
2635 if (!MemOpChains.empty())
2636 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2637
2638 // Build a sequence of copy-to-reg nodes chained together with token chain
2639 // and flag operands which copy the outgoing args into the appropriate regs.
2640 SDValue InGlue;
2641 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2642 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2643 RegsToPass[i].second, InGlue);
2644 InGlue = Chain.getValue(1);
2645 }
2646
2647 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2648 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2649 // node so that legalize doesn't hack it.
2650 bool isDirect = false;
2651
2653 const GlobalValue *GVal = nullptr;
2654 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2655 GVal = G->getGlobal();
2656 bool isStub = !TM.shouldAssumeDSOLocal(GVal) && Subtarget->isTargetMachO();
2657
2658 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2659 bool isLocalARMFunc = false;
2660 auto PtrVt = getPointerTy(DAG.getDataLayout());
2661
2662 if (Subtarget->genLongCalls()) {
2663 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2664 "long-calls codegen is not position independent!");
2665 // Handle a global address or an external symbol. If it's not one of
2666 // those, the target's already in a register, so we don't need to do
2667 // anything extra.
2668 if (isa<GlobalAddressSDNode>(Callee)) {
2669 if (Subtarget->genExecuteOnly()) {
2670 if (Subtarget->useMovt())
2671 ++NumMovwMovt;
2672 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2673 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2674 } else {
2675 // Create a constant pool entry for the callee address
2676 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2678 GVal, ARMPCLabelIndex, ARMCP::CPValue, 0);
2679
2680 // Get the address of the callee into a register
2681 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2682 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2683 Callee = DAG.getLoad(
2684 PtrVt, dl, DAG.getEntryNode(), Addr,
2686 }
2687 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2688 const char *Sym = S->getSymbol();
2689
2690 if (Subtarget->genExecuteOnly()) {
2691 if (Subtarget->useMovt())
2692 ++NumMovwMovt;
2693 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2694 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2695 } else {
2696 // Create a constant pool entry for the callee address
2697 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2699 *DAG.getContext(), Sym, ARMPCLabelIndex, 0);
2700
2701 // Get the address of the callee into a register
2702 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2703 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2704 Callee = DAG.getLoad(
2705 PtrVt, dl, DAG.getEntryNode(), Addr,
2707 }
2708 }
2709 } else if (isa<GlobalAddressSDNode>(Callee)) {
2710 if (!PreferIndirect) {
2711 isDirect = true;
2712 bool isDef = GVal->isStrongDefinitionForLinker();
2713
2714 // ARM call to a local ARM function is predicable.
2715 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2716 // tBX takes a register source operand.
2717 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2718 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2719 Callee = DAG.getNode(
2720 ARMISD::WrapperPIC, dl, PtrVt,
2721 DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2722 Callee = DAG.getLoad(
2723 PtrVt, dl, DAG.getEntryNode(), Callee,
2727 } else if (Subtarget->isTargetCOFF()) {
2728 assert(Subtarget->isTargetWindows() &&
2729 "Windows is the only supported COFF target");
2730 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2731 if (GVal->hasDLLImportStorageClass())
2732 TargetFlags = ARMII::MO_DLLIMPORT;
2733 else if (!TM.shouldAssumeDSOLocal(GVal))
2734 TargetFlags = ARMII::MO_COFFSTUB;
2735 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0,
2736 TargetFlags);
2737 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2738 Callee =
2739 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2740 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2742 } else {
2743 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0);
2744 }
2745 }
2746 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2747 isDirect = true;
2748 // tBX takes a register source operand.
2749 const char *Sym = S->getSymbol();
2750 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2751 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2754 ARMPCLabelIndex, 4);
2755 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2756 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2757 Callee = DAG.getLoad(
2758 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2760 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2761 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2762 } else {
2763 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2764 }
2765 }
2766
2767 if (isCmseNSCall) {
2768 assert(!isARMFunc && !isDirect &&
2769 "Cannot handle call to ARM function or direct call");
2770 if (NumBytes > 0) {
2772 "call to non-secure function would "
2773 "require passing arguments on stack",
2774 dl.getDebugLoc());
2775 DAG.getContext()->diagnose(Diag);
2776 }
2777 if (isStructRet) {
2780 "call to non-secure function would return value through pointer",
2781 dl.getDebugLoc());
2782 DAG.getContext()->diagnose(Diag);
2783 }
2784 }
2785
2786 // FIXME: handle tail calls differently.
2787 unsigned CallOpc;
2788 if (Subtarget->isThumb()) {
2789 if (GuardWithBTI)
2790 CallOpc = ARMISD::t2CALL_BTI;
2791 else if (isCmseNSCall)
2792 CallOpc = ARMISD::tSECALL;
2793 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2794 CallOpc = ARMISD::CALL_NOLINK;
2795 else
2796 CallOpc = ARMISD::CALL;
2797 } else {
2798 if (!isDirect && !Subtarget->hasV5TOps())
2799 CallOpc = ARMISD::CALL_NOLINK;
2800 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2801 // Emit regular call when code size is the priority
2802 !Subtarget->hasMinSize())
2803 // "mov lr, pc; b _foo" to avoid confusing the RSP
2804 CallOpc = ARMISD::CALL_NOLINK;
2805 else
2806 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2807 }
2808
2809 // We don't usually want to end the call-sequence here because we would tidy
2810 // the frame up *after* the call, however in the ABI-changing tail-call case
2811 // we've carefully laid out the parameters so that when sp is reset they'll be
2812 // in the correct location.
2813 if (isTailCall && !isSibCall) {
2814 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, dl);
2815 InGlue = Chain.getValue(1);
2816 }
2817
2818 std::vector<SDValue> Ops;
2819 Ops.push_back(Chain);
2820 Ops.push_back(Callee);
2821
2822 if (isTailCall) {
2823 Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32));
2824 }
2825
2826 // Add argument registers to the end of the list so that they are known live
2827 // into the call.
2828 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2829 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2830 RegsToPass[i].second.getValueType()));
2831
2832 // Add a register mask operand representing the call-preserved registers.
2833 const uint32_t *Mask;
2834 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2835 if (isThisReturn) {
2836 // For 'this' returns, use the R0-preserving mask if applicable
2837 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2838 if (!Mask) {
2839 // Set isThisReturn to false if the calling convention is not one that
2840 // allows 'returned' to be modeled in this way, so LowerCallResult does
2841 // not try to pass 'this' straight through
2842 isThisReturn = false;
2843 Mask = ARI->getCallPreservedMask(MF, CallConv);
2844 }
2845 } else
2846 Mask = ARI->getCallPreservedMask(MF, CallConv);
2847
2848 assert(Mask && "Missing call preserved mask for calling convention");
2849 Ops.push_back(DAG.getRegisterMask(Mask));
2850
2851 if (InGlue.getNode())
2852 Ops.push_back(InGlue);
2853
2854 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2855 if (isTailCall) {
2857 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2858 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2859 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2860 return Ret;
2861 }
2862
2863 // Returns a chain and a flag for retval copy to use.
2864 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2865 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2866 InGlue = Chain.getValue(1);
2867 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2868
2869 // If we're guaranteeing tail-calls will be honoured, the callee must
2870 // pop its own argument stack on return. But this call is *not* a tail call so
2871 // we need to undo that after it returns to restore the status-quo.
2872 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
2873 uint64_t CalleePopBytes =
2874 canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
2875
2876 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
2877 if (!Ins.empty())
2878 InGlue = Chain.getValue(1);
2879
2880 // Handle result values, copying them out of physregs into vregs that we
2881 // return.
2882 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2883 InVals, isThisReturn,
2884 isThisReturn ? OutVals[0] : SDValue());
2885}
2886
2887/// HandleByVal - Every parameter *after* a byval parameter is passed
2888/// on the stack. Remember the next parameter register to allocate,
2889/// and then confiscate the rest of the parameter registers to insure
2890/// this.
2891void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2892 Align Alignment) const {
2893 // Byval (as with any stack) slots are always at least 4 byte aligned.
2894 Alignment = std::max(Alignment, Align(4));
2895
2896 unsigned Reg = State->AllocateReg(GPRArgRegs);
2897 if (!Reg)
2898 return;
2899
2900 unsigned AlignInRegs = Alignment.value() / 4;
2901 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2902 for (unsigned i = 0; i < Waste; ++i)
2903 Reg = State->AllocateReg(GPRArgRegs);
2904
2905 if (!Reg)
2906 return;
2907
2908 unsigned Excess = 4 * (ARM::R4 - Reg);
2909
2910 // Special case when NSAA != SP and parameter size greater than size of
2911 // all remained GPR regs. In that case we can't split parameter, we must
2912 // send it to stack. We also must set NCRN to R4, so waste all
2913 // remained registers.
2914 const unsigned NSAAOffset = State->getStackSize();
2915 if (NSAAOffset != 0 && Size > Excess) {
2916 while (State->AllocateReg(GPRArgRegs))
2917 ;
2918 return;
2919 }
2920
2921 // First register for byval parameter is the first register that wasn't
2922 // allocated before this method call, so it would be "reg".
2923 // If parameter is small enough to be saved in range [reg, r4), then
2924 // the end (first after last) register would be reg + param-size-in-regs,
2925 // else parameter would be splitted between registers and stack,
2926 // end register would be r4 in this case.
2927 unsigned ByValRegBegin = Reg;
2928 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2929 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2930 // Note, first register is allocated in the beginning of function already,
2931 // allocate remained amount of registers we need.
2932 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2933 State->AllocateReg(GPRArgRegs);
2934 // A byval parameter that is split between registers and memory needs its
2935 // size truncated here.
2936 // In the case where the entire structure fits in registers, we set the
2937 // size in memory to zero.
2938 Size = std::max<int>(Size - Excess, 0);
2939}
2940
2941/// MatchingStackOffset - Return true if the given stack call argument is
2942/// already available in the same position (relatively) of the caller's
2943/// incoming argument stack.
2944static
2947 const TargetInstrInfo *TII) {
2948 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2949 int FI = std::numeric_limits<int>::max();
2950 if (Arg.getOpcode() == ISD::CopyFromReg) {
2951 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2952 if (!VR.isVirtual())
2953 return false;
2954 MachineInstr *Def = MRI->getVRegDef(VR);
2955 if (!Def)
2956 return false;
2957 if (!Flags.isByVal()) {
2958 if (!TII->isLoadFromStackSlot(*Def, FI))
2959 return false;
2960 } else {
2961 return false;
2962 }
2963 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2964 if (Flags.isByVal())
2965 // ByVal argument is passed in as a pointer but it's now being
2966 // dereferenced. e.g.
2967 // define @foo(%struct.X* %A) {
2968 // tail call @bar(%struct.X* byval %A)
2969 // }
2970 return false;
2971 SDValue Ptr = Ld->getBasePtr();
2972 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2973 if (!FINode)
2974 return false;
2975 FI = FINode->getIndex();
2976 } else
2977 return false;
2978
2979 assert(FI != std::numeric_limits<int>::max());
2980 if (!MFI.isFixedObjectIndex(FI))
2981 return false;
2982 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2983}
2984
2985/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2986/// for tail call optimization. Targets which want to do tail call
2987/// optimization should implement this function. Note that this function also
2988/// processes musttail calls, so when this function returns false on a valid
2989/// musttail call, a fatal backend error occurs.
2990bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2992 SmallVectorImpl<CCValAssign> &ArgLocs, const bool isIndirect) const {
2993 CallingConv::ID CalleeCC = CLI.CallConv;
2994 SDValue Callee = CLI.Callee;
2995 bool isVarArg = CLI.IsVarArg;
2996 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2997 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2999 const SelectionDAG &DAG = CLI.DAG;
3001 const Function &CallerF = MF.getFunction();
3002 CallingConv::ID CallerCC = CallerF.getCallingConv();
3003
3004 assert(Subtarget->supportsTailCall());
3005
3006 // Indirect tail calls cannot be optimized for Thumb1 if the args
3007 // to the call take up r0-r3. The reason is that there are no legal registers
3008 // left to hold the pointer to the function to be called.
3009 // Similarly, if the function uses return address sign and authentication,
3010 // r12 is needed to hold the PAC and is not available to hold the callee
3011 // address.
3012 if (Outs.size() >= 4 &&
3013 (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) {
3014 if (Subtarget->isThumb1Only())
3015 return false;
3016 // Conservatively assume the function spills LR.
3018 return false;
3019 }
3020
3021 // Look for obvious safe cases to perform tail call optimization that do not
3022 // require ABI changes. This is what gcc calls sibcall.
3023
3024 // Exception-handling functions need a special set of instructions to indicate
3025 // a return to the hardware. Tail-calling another function would probably
3026 // break this.
3027 if (CallerF.hasFnAttribute("interrupt"))
3028 return false;
3029
3030 if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
3031 return CalleeCC == CallerCC;
3032
3033 // Also avoid sibcall optimization if either caller or callee uses struct
3034 // return semantics.
3035 bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
3036 bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
3037 if (isCalleeStructRet || isCallerStructRet)
3038 return false;
3039
3040 // Externally-defined functions with weak linkage should not be
3041 // tail-called on ARM when the OS does not support dynamic
3042 // pre-emption of symbols, as the AAELF spec requires normal calls
3043 // to undefined weak functions to be replaced with a NOP or jump to the
3044 // next instruction. The behaviour of branch instructions in this
3045 // situation (as used for tail calls) is implementation-defined, so we
3046 // cannot rely on the linker replacing the tail call with a return.
3047 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3048 const GlobalValue *GV = G->getGlobal();
3050 if (GV->hasExternalWeakLinkage() &&
3051 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3052 return false;
3053 }
3054
3055 // Check that the call results are passed in the same way.
3056 LLVMContext &C = *DAG.getContext();
3058 getEffectiveCallingConv(CalleeCC, isVarArg),
3059 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3060 CCAssignFnForReturn(CalleeCC, isVarArg),
3061 CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
3062 return false;
3063 // The callee has to preserve all registers the caller needs to preserve.
3064 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3065 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3066 if (CalleeCC != CallerCC) {
3067 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3068 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3069 return false;
3070 }
3071
3072 // If Caller's vararg or byval argument has been split between registers and
3073 // stack, do not perform tail call, since part of the argument is in caller's
3074 // local frame.
3075 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3076 if (AFI_Caller->getArgRegsSaveSize())
3077 return false;
3078
3079 // If the callee takes no arguments then go on to check the results of the
3080 // call.
3081 if (!Outs.empty()) {
3082 if (CCInfo.getStackSize()) {
3083 // Check if the arguments are already laid out in the right way as
3084 // the caller's fixed stack objects.
3085 MachineFrameInfo &MFI = MF.getFrameInfo();
3086 const MachineRegisterInfo *MRI = &MF.getRegInfo();
3087 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3088 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3089 i != e;
3090 ++i, ++realArgIdx) {
3091 CCValAssign &VA = ArgLocs[i];
3092 EVT RegVT = VA.getLocVT();
3093 SDValue Arg = OutVals[realArgIdx];
3094 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3096 return false;
3097 if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3098 // f64 and vector types are split into multiple registers or
3099 // register/stack-slot combinations. The types will not match
3100 // the registers; give up on memory f64 refs until we figure
3101 // out what to do about this.
3102 if (!VA.isRegLoc())
3103 return false;
3104 if (!ArgLocs[++i].isRegLoc())
3105 return false;
3106 if (RegVT == MVT::v2f64) {
3107 if (!ArgLocs[++i].isRegLoc())
3108 return false;
3109 if (!ArgLocs[++i].isRegLoc())
3110 return false;
3111 }
3112 } else if (!VA.isRegLoc()) {
3114 MFI, MRI, TII))
3115 return false;
3116 }
3117 }
3118 }
3119
3120 const MachineRegisterInfo &MRI = MF.getRegInfo();
3121 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3122 return false;
3123 }
3124
3125 return true;
3126}
3127
3128bool
3129ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3130 MachineFunction &MF, bool isVarArg,
3132 LLVMContext &Context) const {
3134 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3135 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3136}
3137
3139 const SDLoc &DL, SelectionDAG &DAG) {
3140 const MachineFunction &MF = DAG.getMachineFunction();
3141 const Function &F = MF.getFunction();
3142
3143 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3144
3145 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3146 // version of the "preferred return address". These offsets affect the return
3147 // instruction if this is a return from PL1 without hypervisor extensions.
3148 // IRQ/FIQ: +4 "subs pc, lr, #4"
3149 // SWI: 0 "subs pc, lr, #0"
3150 // ABORT: +4 "subs pc, lr, #4"
3151 // UNDEF: +4/+2 "subs pc, lr, #0"
3152 // UNDEF varies depending on where the exception came from ARM or Thumb
3153 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3154
3155 int64_t LROffset;
3156 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3157 IntKind == "ABORT")
3158 LROffset = 4;
3159 else if (IntKind == "SWI" || IntKind == "UNDEF")
3160 LROffset = 0;
3161 else
3162 report_fatal_error("Unsupported interrupt attribute. If present, value "
3163 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3164
3165 RetOps.insert(RetOps.begin() + 1,
3166 DAG.getConstant(LROffset, DL, MVT::i32, false));
3167
3168 return DAG.getNode(ARMISD::INTRET_GLUE, DL, MVT::Other, RetOps);
3169}
3170
3171SDValue
3172ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3173 bool isVarArg,
3175 const SmallVectorImpl<SDValue> &OutVals,
3176 const SDLoc &dl, SelectionDAG &DAG) const {
3177 // CCValAssign - represent the assignment of the return value to a location.
3179
3180 // CCState - Info about the registers and stack slots.
3181 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3182 *DAG.getContext());
3183
3184 // Analyze outgoing return values.
3185 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3186
3187 SDValue Glue;
3189 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3190 bool isLittleEndian = Subtarget->isLittle();
3191
3194 AFI->setReturnRegsCount(RVLocs.size());
3195
3196 // Report error if cmse entry function returns structure through first ptr arg.
3197 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3198 // Note: using an empty SDLoc(), as the first line of the function is a
3199 // better place to report than the last line.
3202 "secure entry function would return value through pointer",
3203 SDLoc().getDebugLoc());
3204 DAG.getContext()->diagnose(Diag);
3205 }
3206
3207 // Copy the result values into the output registers.
3208 for (unsigned i = 0, realRVLocIdx = 0;
3209 i != RVLocs.size();
3210 ++i, ++realRVLocIdx) {
3211 CCValAssign &VA = RVLocs[i];
3212 assert(VA.isRegLoc() && "Can only return in registers!");
3213
3214 SDValue Arg = OutVals[realRVLocIdx];
3215 bool ReturnF16 = false;
3216
3217 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3218 // Half-precision return values can be returned like this:
3219 //
3220 // t11 f16 = fadd ...
3221 // t12: i16 = bitcast t11
3222 // t13: i32 = zero_extend t12
3223 // t14: f32 = bitcast t13 <~~~~~~~ Arg
3224 //
3225 // to avoid code generation for bitcasts, we simply set Arg to the node
3226 // that produces the f16 value, t11 in this case.
3227 //
3228 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3229 SDValue ZE = Arg.getOperand(0);
3230 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3231 SDValue BC = ZE.getOperand(0);
3232 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3233 Arg = BC.getOperand(0);
3234 ReturnF16 = true;
3235 }
3236 }
3237 }
3238 }
3239
3240 switch (VA.getLocInfo()) {
3241 default: llvm_unreachable("Unknown loc info!");
3242 case CCValAssign::Full: break;
3243 case CCValAssign::BCvt:
3244 if (!ReturnF16)
3245 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3246 break;
3247 }
3248
3249 // Mask f16 arguments if this is a CMSE nonsecure entry.
3250 auto RetVT = Outs[realRVLocIdx].ArgVT;
3251 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3252 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3253 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3254 } else {
3255 auto LocBits = VA.getLocVT().getSizeInBits();
3256 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3257 SDValue Mask =
3258 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3259 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3260 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3261 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3262 }
3263 }
3264
3265 if (VA.needsCustom() &&
3266 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3267 if (VA.getLocVT() == MVT::v2f64) {
3268 // Extract the first half and return it in two registers.
3269 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3270 DAG.getConstant(0, dl, MVT::i32));
3271 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3272 DAG.getVTList(MVT::i32, MVT::i32), Half);
3273
3274 Chain =
3275 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3276 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Glue);
3277 Glue = Chain.getValue(1);
3278 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3279 VA = RVLocs[++i]; // skip ahead to next loc
3280 Chain =
3281 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3282 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Glue);
3283 Glue = Chain.getValue(1);
3284 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3285 VA = RVLocs[++i]; // skip ahead to next loc
3286
3287 // Extract the 2nd half and fall through to handle it as an f64 value.
3288 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3289 DAG.getConstant(1, dl, MVT::i32));
3290 }
3291 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3292 // available.
3293 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3294 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3295 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3296 fmrrd.getValue(isLittleEndian ? 0 : 1), Glue);
3297 Glue = Chain.getValue(1);
3298 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3299 VA = RVLocs[++i]; // skip ahead to next loc
3300 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3301 fmrrd.getValue(isLittleEndian ? 1 : 0), Glue);
3302 } else
3303 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
3304
3305 // Guarantee that all emitted copies are
3306 // stuck together, avoiding something bad.
3307 Glue = Chain.getValue(1);
3308 RetOps.push_back(DAG.getRegister(
3309 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3310 }
3311 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3312 const MCPhysReg *I =
3313 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3314 if (I) {
3315 for (; *I; ++I) {
3316 if (ARM::GPRRegClass.contains(*I))
3317 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3318 else if (ARM::DPRRegClass.contains(*I))
3320 else
3321 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3322 }
3323 }
3324
3325 // Update chain and glue.
3326 RetOps[0] = Chain;
3327 if (Glue.getNode())
3328 RetOps.push_back(Glue);
3329
3330 // CPUs which aren't M-class use a special sequence to return from
3331 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3332 // though we use "subs pc, lr, #N").
3333 //
3334 // M-class CPUs actually use a normal return sequence with a special
3335 // (hardware-provided) value in LR, so the normal code path works.
3336 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3337 !Subtarget->isMClass()) {
3338 if (Subtarget->isThumb1Only())
3339 report_fatal_error("interrupt attribute is not supported in Thumb1");
3340 return LowerInterruptReturn(RetOps, dl, DAG);
3341 }
3342
3345 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3346}
3347
3348bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3349 if (N->getNumValues() != 1)
3350 return false;
3351 if (!N->hasNUsesOfValue(1, 0))
3352 return false;
3353
3354 SDValue TCChain = Chain;
3355 SDNode *Copy = *N->use_begin();
3356 if (Copy->getOpcode() == ISD::CopyToReg) {
3357 // If the copy has a glue operand, we conservatively assume it isn't safe to
3358 // perform a tail call.
3359 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3360 return false;
3361 TCChain = Copy->getOperand(0);
3362 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3363 SDNode *VMov = Copy;
3364 // f64 returned in a pair of GPRs.
3366 for (SDNode *U : VMov->uses()) {
3367 if (U->getOpcode() != ISD::CopyToReg)
3368 return false;
3369 Copies.insert(U);
3370 }
3371 if (Copies.size() > 2)
3372 return false;
3373
3374 for (SDNode *U : VMov->uses()) {
3375 SDValue UseChain = U->getOperand(0);
3376 if (Copies.count(UseChain.getNode()))
3377 // Second CopyToReg
3378 Copy = U;
3379 else {
3380 // We are at the top of this chain.
3381 // If the copy has a glue operand, we conservatively assume it
3382 // isn't safe to perform a tail call.
3383 if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3384 return false;
3385 // First CopyToReg
3386 TCChain = UseChain;
3387 }
3388 }
3389 } else if (Copy->getOpcode() == ISD::BITCAST) {
3390 // f32 returned in a single GPR.
3391 if (!Copy->hasOneUse())
3392 return false;
3393 Copy = *Copy->use_begin();
3394 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3395 return false;
3396 // If the copy has a glue operand, we conservatively assume it isn't safe to
3397 // perform a tail call.
3398 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3399 return false;
3400 TCChain = Copy->getOperand(0);
3401 } else {
3402 return false;
3403 }
3404
3405 bool HasRet = false;
3406 for (const SDNode *U : Copy->uses()) {
3407 if (U->getOpcode() != ARMISD::RET_GLUE &&
3408 U->getOpcode() != ARMISD::INTRET_GLUE)
3409 return false;
3410 HasRet = true;
3411 }
3412
3413 if (!HasRet)
3414 return false;
3415
3416 Chain = TCChain;
3417 return true;
3418}
3419
3420bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3421 if (!Subtarget->supportsTailCall())
3422 return false;
3423
3424 if (!CI->isTailCall())
3425 return false;
3426
3427 return true;
3428}
3429
3430// Trying to write a 64 bit value so need to split into two 32 bit values first,
3431// and pass the lower and high parts through.
3433 SDLoc DL(Op);
3434 SDValue WriteValue = Op->getOperand(2);
3435
3436 // This function is only supposed to be called for i64 type argument.
3437 assert(WriteValue.getValueType() == MVT::i64
3438 && "LowerWRITE_REGISTER called for non-i64 type argument.");
3439
3440 SDValue Lo, Hi;
3441 std::tie(Lo, Hi) = DAG.SplitScalar(WriteValue, DL, MVT::i32, MVT::i32);
3442 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3443 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3444}
3445
3446// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3447// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3448// one of the above mentioned nodes. It has to be wrapped because otherwise
3449// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3450// be used to form addressing mode. These wrapped nodes will be selected
3451// into MOVi.
3452SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3453 SelectionDAG &DAG) const {
3454 EVT PtrVT = Op.getValueType();
3455 // FIXME there is no actual debug info here
3456 SDLoc dl(Op);
3457 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3458 SDValue Res;
3459
3460 // When generating execute-only code Constant Pools must be promoted to the
3461 // global data section. It's a bit ugly that we can't share them across basic
3462 // blocks, but this way we guarantee that execute-only behaves correct with
3463 // position-independent addressing modes.
3464 if (Subtarget->genExecuteOnly()) {
3465 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3466 auto T = const_cast<Type*>(CP->getType());
3467 auto C = const_cast<Constant*>(CP->getConstVal());
3468 auto M = const_cast<Module*>(DAG.getMachineFunction().
3470 auto GV = new GlobalVariable(
3471 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3474 Twine(AFI->createPICLabelUId())
3475 );
3476 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3477 dl, PtrVT);
3478 return LowerGlobalAddress(GA, DAG);
3479 }
3480
3481 // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3482 // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3483 Align CPAlign = CP->getAlign();
3484 if (Subtarget->isThumb1Only())
3485 CPAlign = std::max(CPAlign, Align(4));
3486 if (CP->isMachineConstantPoolEntry())
3487 Res =
3488 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3489 else
3490 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3491 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3492}
3493
3495 // If we don't have a 32-bit pc-relative branch instruction then the jump
3496 // table consists of block addresses. Usually this is inline, but for
3497 // execute-only it must be placed out-of-line.
3498 if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
3501}
3502
3503SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3504 SelectionDAG &DAG) const {
3507 unsigned ARMPCLabelIndex = 0;
3508 SDLoc DL(Op);
3509 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3510 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3511 SDValue CPAddr;
3512 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3513 if (!IsPositionIndependent) {
3514 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3515 } else {
3516 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3517 ARMPCLabelIndex = AFI->createPICLabelUId();
3519 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3520 ARMCP::CPBlockAddress, PCAdj);
3521 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3522 }
3523 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3524 SDValue Result = DAG.getLoad(
3525 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3527 if (!IsPositionIndependent)
3528 return Result;
3529 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3530 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3531}
3532
3533/// Convert a TLS address reference into the correct sequence of loads
3534/// and calls to compute the variable's address for Darwin, and return an
3535/// SDValue containing the final node.
3536
3537/// Darwin only has one TLS scheme which must be capable of dealing with the
3538/// fully general situation, in the worst case. This means:
3539/// + "extern __thread" declaration.
3540/// + Defined in a possibly unknown dynamic library.
3541///
3542/// The general system is that each __thread variable has a [3 x i32] descriptor
3543/// which contains information used by the runtime to calculate the address. The
3544/// only part of this the compiler needs to know about is the first word, which
3545/// contains a function pointer that must be called with the address of the
3546/// entire descriptor in "r0".
3547///
3548/// Since this descriptor may be in a different unit, in general access must
3549/// proceed along the usual ARM rules. A common sequence to produce is:
3550///
3551/// movw rT1, :lower16:_var$non_lazy_ptr
3552/// movt rT1, :upper16:_var$non_lazy_ptr
3553/// ldr r0, [rT1]
3554/// ldr rT2, [r0]
3555/// blx rT2
3556/// [...address now in r0...]
3557SDValue
3558ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3559 SelectionDAG &DAG) const {
3560 assert(Subtarget->isTargetDarwin() &&
3561 "This function expects a Darwin target");
3562 SDLoc DL(Op);
3563
3564 // First step is to get the address of the actua global symbol. This is where
3565 // the TLS descriptor lives.
3566 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3567
3568 // The first entry in the descriptor is a function pointer that we must call
3569 // to obtain the address of the variable.
3570 SDValue Chain = DAG.getEntryNode();
3571 SDValue FuncTLVGet = DAG.getLoad(
3572 MVT::i32, DL, Chain, DescAddr,
3576 Chain = FuncTLVGet.getValue(1);
3577
3579 MachineFrameInfo &MFI = F.getFrameInfo();
3580 MFI.setAdjustsStack(true);
3581
3582 // TLS calls preserve all registers except those that absolutely must be
3583 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3584 // silly).
3585 auto TRI =
3587 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3589
3590 // Finally, we can make the call. This is just a degenerate version of a
3591 // normal AArch64 call node: r0 takes the address of the descriptor, and
3592 // returns the address of the variable in this thread.
3593 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3594 Chain =
3595 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3596 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3597 DAG.getRegisterMask(Mask), Chain.getValue(1));
3598 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3599}
3600
3601SDValue
3602ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3603 SelectionDAG &DAG) const {
3604 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3605
3606 SDValue Chain = DAG.getEntryNode();
3607 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3608 SDLoc DL(Op);
3609
3610 // Load the current TEB (thread environment block)
3611 SDValue Ops[] = {Chain,
3612 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3613 DAG.getTargetConstant(15, DL, MVT::i32),
3614 DAG.getTargetConstant(0, DL, MVT::i32),
3615 DAG.getTargetConstant(13, DL, MVT::i32),
3616 DAG.getTargetConstant(0, DL, MVT::i32),
3617 DAG.getTargetConstant(2, DL, MVT::i32)};
3618 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3619 DAG.getVTList(MVT::i32, MVT::Other), Ops);
3620
3621 SDValue TEB = CurrentTEB.getValue(0);
3622 Chain = CurrentTEB.getValue(1);
3623
3624 // Load the ThreadLocalStoragePointer from the TEB
3625 // A pointer to the TLS array is located at offset 0x2c from the TEB.
3626 SDValue TLSArray =
3627 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3628 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3629
3630 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3631 // offset into the TLSArray.
3632
3633 // Load the TLS index from the C runtime
3634 SDValue TLSIndex =
3635 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3636 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3637 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3638
3639 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3640 DAG.getConstant(2, DL, MVT::i32));
3641 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3642 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3644
3645 // Get the offset of the start of the .tls section (section base)
3646 const auto *GA = cast<GlobalAddressSDNode>(Op);
3647 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3648 SDValue Offset = DAG.getLoad(
3649 PtrVT, DL, Chain,
3650 DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3651 DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3653
3654 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3655}
3656
3657// Lower ISD::GlobalTLSAddress using the "general dynamic" model
3658SDValue
3659ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3660 SelectionDAG &DAG) const {
3661 SDLoc dl(GA);
3662 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3663 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3666 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3668 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3669 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3670 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3671 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3672 Argument = DAG.getLoad(
3673 PtrVT, dl, DAG.getEntryNode(), Argument,
3675 SDValue Chain = Argument.getValue(1);
3676
3677 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3678 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3679
3680 // call __tls_get_addr.
3682 ArgListEntry Entry;
3683 Entry.Node = Argument;
3684 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3685 Args.push_back(Entry);
3686
3687 // FIXME: is there useful debug info available here?
3689 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3691 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3692
3693 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3694 return CallResult.first;
3695}
3696
3697// Lower ISD::GlobalTLSAddress using the "initial exec" or
3698// "local exec" model.
3699SDValue
3700ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3701 SelectionDAG &DAG,
3702 TLSModel::Model model) const {
3703 const GlobalValue *GV = GA->getGlobal();
3704 SDLoc dl(GA);
3706 SDValue Chain = DAG.getEntryNode();
3707 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3708 // Get the Thread Pointer
3710
3711 if (model == TLSModel::InitialExec) {
3714 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3715 // Initial exec model.
3716 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3718 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3720 true);
3721 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3722 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3723 Offset = DAG.getLoad(
3724 PtrVT, dl, Chain, Offset,
3726 Chain = Offset.getValue(1);
3727
3728 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3729 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3730
3731 Offset = DAG.getLoad(
3732 PtrVT, dl, Chain, Offset,
3734 } else {
3735 // local exec model
3736 assert(model == TLSModel::LocalExec);
3739 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3740 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3741 Offset = DAG.getLoad(
3742 PtrVT, dl, Chain, Offset,
3744 }
3745
3746 // The address of the thread local variable is the add of the thread
3747 // pointer with the offset of the variable.
3748 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3749}
3750
3751SDValue
3752ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3753 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3754 if (DAG.getTarget().useEmulatedTLS())
3755 return LowerToTLSEmulatedModel(GA, DAG);
3756
3757 if (Subtarget->isTargetDarwin())
3758 return LowerGlobalTLSAddressDarwin(Op, DAG);
3759
3760 if (Subtarget->isTargetWindows())
3761 return LowerGlobalTLSAddressWindows(Op, DAG);
3762
3763 // TODO: implement the "local dynamic" model
3764 assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3766
3767 switch (model) {
3770 return LowerToTLSGeneralDynamicModel(GA, DAG);
3773 return LowerToTLSExecModels(GA, DAG, model);
3774 }
3775 llvm_unreachable("bogus TLS model");
3776}
3777
3778/// Return true if all users of V are within function F, looking through
3779/// ConstantExprs.
3780static bool allUsersAreInFunction(const Value *V, const Function *F) {
3781 SmallVector<const User*,4> Worklist(V->users());
3782 while (!Worklist.empty()) {
3783 auto *U = Worklist.pop_back_val();
3784 if (isa<ConstantExpr>(U)) {
3785 append_range(Worklist, U->users());
3786 continue;
3787 }
3788
3789 auto *I = dyn_cast<Instruction>(U);
3790 if (!I || I->getParent()->getParent() != F)
3791 return false;
3792 }
3793 return true;
3794}
3795
3797 const GlobalValue *GV, SelectionDAG &DAG,
3798 EVT PtrVT, const SDLoc &dl) {
3799 // If we're creating a pool entry for a constant global with unnamed address,
3800 // and the global is small enough, we can emit it inline into the constant pool
3801 // to save ourselves an indirection.
3802 //
3803 // This is a win if the constant is only used in one function (so it doesn't
3804 // need to be duplicated) or duplicating the constant wouldn't increase code
3805 // size (implying the constant is no larger than 4 bytes).
3806 const Function &F = DAG.getMachineFunction().getFunction();
3807
3808 // We rely on this decision to inline being idemopotent and unrelated to the
3809 // use-site. We know that if we inline a variable at one use site, we'll
3810 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3811 // doesn't know about this optimization, so bail out if it's enabled else
3812 // we could decide to inline here (and thus never emit the GV) but require
3813 // the GV from fast-isel generated code.
3816 return SDValue();
3817
3818 auto *GVar = dyn_cast<GlobalVariable>(GV);
3819 if (!GVar || !GVar->hasInitializer() ||
3820 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3821 !GVar->hasLocalLinkage())
3822 return SDValue();
3823
3824 // If we inline a value that contains relocations, we move the relocations
3825 // from .data to .text. This is not allowed in position-independent code.
3826 auto *Init = GVar->getInitializer();
3827 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3828 Init->needsDynamicRelocation())
3829 return SDValue();
3830
3831 // The constant islands pass can only really deal with alignment requests
3832 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3833 // any type wanting greater alignment requirements than 4 bytes. We also
3834 // can only promote constants that are multiples of 4 bytes in size or
3835 // are paddable to a multiple of 4. Currently we only try and pad constants
3836 // that are strings for simplicity.
3837 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3838 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3839 Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3840 unsigned RequiredPadding = 4 - (Size % 4);
3841 bool PaddingPossible =
3842 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3843 if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3844 Size == 0)
3845 return SDValue();
3846
3847 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3850
3851 // We can't bloat the constant pool too much, else the ConstantIslands pass
3852 // may fail to converge. If we haven't promoted this global yet (it may have
3853 // multiple uses), and promoting it would increase the constant pool size (Sz
3854 // > 4), ensure we have space to do so up to MaxTotal.
3855 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3856 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3858 return SDValue();
3859
3860 // This is only valid if all users are in a single function; we can't clone
3861 // the constant in general. The LLVM IR unnamed_addr allows merging
3862 // constants, but not cloning them.
3863 //
3864 // We could potentially allow cloning if we could prove all uses of the
3865 // constant in the current function don't care about the address, like
3866 // printf format strings. But that isn't implemented for now.
3867 if (!allUsersAreInFunction(GVar, &F))
3868 return SDValue();
3869
3870 // We're going to inline this global. Pad it out if needed.
3871 if (RequiredPadding != 4) {
3872 StringRef S = CDAInit->getAsString();
3873
3875 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3876 while (RequiredPadding--)
3877 V.push_back(0);
3879 }
3880
3881 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3882 SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3883 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3886 PaddedSize - 4);
3887 }
3888 ++NumConstpoolPromoted;
3889 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3890}
3891
3893 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3894 if (!(GV = GA->getAliaseeObject()))
3895 return false;
3896 if (const auto *V = dyn_cast<GlobalVariable>(GV))
3897 return V->isConstant();
3898 return isa<Function>(GV);
3899}
3900
3901SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3902 SelectionDAG &DAG) const {
3903 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3904 default: llvm_unreachable("unknown object format");
3905 case Triple::COFF:
3906 return LowerGlobalAddressWindows(Op, DAG);
3907 case Triple::ELF:
3908 return LowerGlobalAddressELF(Op, DAG);
3909 case Triple::MachO:
3910 return LowerGlobalAddressDarwin(Op, DAG);
3911 }
3912}
3913
3914SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3915 SelectionDAG &DAG) const {
3916 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3917 SDLoc dl(Op);
3918 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3919 bool IsRO = isReadOnly(GV);
3920
3921 // promoteToConstantPool only if not generating XO text section
3922 if (GV->isDSOLocal() && !Subtarget->genExecuteOnly())
3923 if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3924 return V;
3925
3926 if (isPositionIndependent()) {
3928 GV, dl, PtrVT, 0, GV->isDSOLocal() ? 0 : ARMII::MO_GOT);
3929 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3930 if (!GV->isDSOLocal())
3931 Result =
3932 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3934 return Result;
3935 } else if (Subtarget->isROPI() && IsRO) {
3936 // PC-relative.
3937 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3938 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3939 return Result;
3940 } else if (Subtarget->isRWPI() && !IsRO) {
3941 // SB-relative.
3942 SDValue RelAddr;
3943 if (Subtarget->useMovt()) {
3944 ++NumMovwMovt;
3945 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3946 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3947 } else { // use literal pool for address constant
3950 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3951 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3952 RelAddr = DAG.getLoad(
3953 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3955 }
3956 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3957 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3958 return Result;
3959 }
3960
3961 // If we have T2 ops, we can materialize the address directly via movt/movw
3962 // pair. This is always cheaper. If need to generate Execute Only code, and we
3963 // only have Thumb1 available, we can't use a constant pool and are forced to
3964 // use immediate relocations.
3965 if (Subtarget->useMovt() || Subtarget->genExecuteOnly()) {
3966 if (Subtarget->useMovt())
3967 ++NumMovwMovt;
3968 // FIXME: Once remat is capable of dealing with instructions with register
3969 // operands, expand this into two nodes.
3970 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3971 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3972 } else {
3973 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
3974 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3975 return DAG.getLoad(
3976 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3978 }
3979}
3980
3981SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3982 SelectionDAG &DAG) const {
3983 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3984 "ROPI/RWPI not currently supported for Darwin");
3985 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3986 SDLoc dl(Op);
3987 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3988
3989 if (Subtarget->useMovt())
3990 ++NumMovwMovt;
3991
3992 // FIXME: Once remat is capable of dealing with instructions with register
3993 // operands, expand this into multiple nodes
3994 unsigned Wrapper =
3996
3997 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3998 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3999
4000 if (Subtarget->isGVIndirectSymbol(GV))
4001 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
4003 return Result;
4004}
4005
4006SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
4007 SelectionDAG &DAG) const {
4008 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
4009 assert(Subtarget->useMovt() &&
4010 "Windows on ARM expects to use movw/movt");
4011 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4012 "ROPI/RWPI not currently supported for Windows");
4013
4015 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4016 ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
4017 if (GV->hasDLLImportStorageClass())
4018 TargetFlags = ARMII::MO_DLLIMPORT;
4019 else if (!TM.shouldAssumeDSOLocal(GV))
4020 TargetFlags = ARMII::MO_COFFSTUB;
4021 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4023 SDLoc DL(Op);
4024
4025 ++NumMovwMovt;
4026
4027 // FIXME: Once remat is capable of dealing with instructions with register
4028 // operands, expand this into two nodes.
4029 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
4030 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
4031 TargetFlags));
4032 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
4033 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4035 return Result;
4036}
4037
4038SDValue
4039ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
4040 SDLoc dl(Op);
4041 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
4042 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
4043 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
4044 Op.getOperand(1), Val);
4045}
4046
4047SDValue
4048ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
4049 SDLoc dl(Op);
4050 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
4051 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
4052}
4053
4054SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
4055 SelectionDAG &DAG) const {
4056 SDLoc dl(Op);
4057 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
4058 Op.getOperand(0));
4059}
4060
4061SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
4062 SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
4063 unsigned IntNo =
4064 Op.getConstantOperandVal(Op.getOperand(0).getValueType() == MVT::Other);
4065 switch (IntNo) {
4066 default:
4067 return SDValue(); // Don't custom lower most intrinsics.
4068 case Intrinsic::arm_gnu_eabi_mcount: {
4070 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4071 SDLoc dl(Op);
4072 SDValue Chain = Op.getOperand(0);
4073 // call "\01__gnu_mcount_nc"
4074 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
4075 const uint32_t *Mask =
4077 assert(Mask && "Missing call preserved mask for calling convention");
4078 // Mark LR an implicit live-in.
4079 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4080 SDValue ReturnAddress =
4081 DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
4082 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
4083 SDValue Callee =
4084 DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
4086 if (Subtarget->isThumb())
4087 return SDValue(
4088 DAG.getMachineNode(
4089 ARM::tBL_PUSHLR, dl, ResultTys,
4090 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
4091 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
4092 0);
4093 return SDValue(
4094 DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
4095 {ReturnAddress, Callee, RegisterMask, Chain}),
4096 0);
4097 }
4098 }
4099}
4100
4101SDValue
4102ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
4103 const ARMSubtarget *Subtarget) const {
4104 unsigned IntNo = Op.getConstantOperandVal(0);
4105 SDLoc dl(Op);
4106 switch (IntNo) {
4107 default: return SDValue(); // Don't custom lower most intrinsics.
4108 case Intrinsic::thread_pointer: {
4109 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4110 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
4111 }
4112 case Intrinsic::arm_cls: {
4113 const SDValue &Operand = Op.getOperand(1);
4114 const EVT VTy = Op.getValueType();
4115 SDValue SRA =
4116 DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
4117 SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
4118 SDValue SHL =
4119 DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
4120 SDValue OR =
4121 DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
4122 SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
4123 return Result;
4124 }
4125 case Intrinsic::arm_cls64: {
4126 // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
4127 // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
4128 const SDValue &Operand = Op.getOperand(1);
4129 const EVT VTy = Op.getValueType();
4130 SDValue Lo, Hi;
4131 std::tie(Lo, Hi) = DAG.SplitScalar(Operand, dl, VTy, VTy);
4132 SDValue Constant0 = DAG.getConstant(0, dl, VTy);
4133 SDValue Constant1 = DAG.getConstant(1, dl, VTy);
4134 SDValue Constant31 = DAG.getConstant(31, dl, VTy);
4135 SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
4136 SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
4137 SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
4138 SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
4139 SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
4140 SDValue CheckLo =
4141 DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
4142 SDValue HiIsZero =
4143 DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
4144 SDValue AdjustedLo =
4145 DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
4146 SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
4147 SDValue Result =
4148 DAG.getSelect(dl, VTy, CheckLo,
4149 DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
4150 return Result;
4151 }
4152 case Intrinsic::eh_sjlj_lsda: {
4155 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
4156 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4157 SDValue CPAddr;
4158 bool IsPositionIndependent = isPositionIndependent();
4159 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
4161 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
4162 ARMCP::CPLSDA, PCAdj);
4163 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
4164 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4165 SDValue Result = DAG.getLoad(
4166 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4168
4169 if (IsPositionIndependent) {
4170 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
4171 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
4172 }
4173 return Result;
4174 }
4175 case Intrinsic::arm_neon_vabs:
4176 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
4177 Op.getOperand(1));
4178 case Intrinsic::arm_neon_vmulls:
4179 case Intrinsic::arm_neon_vmullu: {
4180 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
4182 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4183 Op.getOperand(1), Op.getOperand(2));
4184 }
4185 case Intrinsic::arm_neon_vminnm:
4186 case Intrinsic::arm_neon_vmaxnm: {
4187 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
4189 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4190 Op.getOperand(1), Op.getOperand(2));
4191 }
4192 case Intrinsic::arm_neon_vminu:
4193 case Intrinsic::arm_neon_vmaxu: {
4194 if (Op.getValueType().isFloatingPoint())
4195 return SDValue();
4196 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
4197 ? ISD::UMIN : ISD::UMAX;
4198 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4199 Op.getOperand(1), Op.getOperand(2));
4200 }
4201 case Intrinsic::arm_neon_vmins:
4202 case Intrinsic::arm_neon_vmaxs: {
4203 // v{min,max}s is overloaded between signed integers and floats.
4204 if (!Op.getValueType().isFloatingPoint()) {
4205 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4206 ? ISD::SMIN : ISD::SMAX;
4207 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4208 Op.getOperand(1), Op.getOperand(2));
4209 }
4210 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4212 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4213 Op.getOperand(1), Op.getOperand(2));
4214 }
4215 case Intrinsic::arm_neon_vtbl1:
4216 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
4217 Op.getOperand(1), Op.getOperand(2));
4218 case Intrinsic::arm_neon_vtbl2:
4219 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
4220 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4221 case Intrinsic::arm_mve_pred_i2v:
4222 case Intrinsic::arm_mve_pred_v2i:
4223 return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
4224 Op.getOperand(1));
4225 case Intrinsic::arm_mve_vreinterpretq:
4226 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
4227 Op.getOperand(1));
4228 case Intrinsic::arm_mve_lsll:
4229 return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4230 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4231 case Intrinsic::arm_mve_asrl:
4232 return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4233 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4234 }
4235}
4236
4238 const ARMSubtarget *Subtarget) {
4239 SDLoc dl(Op);
4240 auto SSID = static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4241 if (SSID == SyncScope::SingleThread)
4242 return Op;
4243
4244 if (!Subtarget->hasDataBarrier()) {
4245 // Some ARMv6 cpus can support data barriers with an mcr instruction.
4246 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4247 // here.
4248 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
4249 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
4250 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
4251 DAG.getConstant(0, dl, MVT::i32));
4252 }
4253
4254 AtomicOrdering Ord =
4255 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4257 if (Subtarget->isMClass()) {
4258 // Only a full system barrier exists in the M-class architectures.
4260 } else if (Subtarget->preferISHSTBarriers() &&
4261 Ord == AtomicOrdering::Release) {
4262 // Swift happens to implement ISHST barriers in a way that's compatible with
4263 // Release semantics but weaker than ISH so we'd be fools not to use
4264 // it. Beware: other processors probably don't!
4266 }
4267
4268 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
4269 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
4270 DAG.getConstant(Domain, dl, MVT::i32));
4271}
4272
4274 const ARMSubtarget *Subtarget) {
4275 // ARM pre v5TE and Thumb1 does not have preload instructions.
4276 if (!(Subtarget->isThumb2() ||
4277 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4278 // Just preserve the chain.
4279 return Op.getOperand(0);
4280
4281 SDLoc dl(Op);
4282 unsigned isRead = ~Op.getConstantOperandVal(2) & 1;
4283 if (!isRead &&
4284 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4285 // ARMv7 with MP extension has PLDW.
4286 return Op.getOperand(0);
4287
4288 unsigned isData = Op.getConstantOperandVal(4);
4289 if (Subtarget->isThumb()) {
4290 // Invert the bits.
4291 isRead = ~isRead & 1;
4292 isData = ~isData & 1;
4293 }
4294
4295 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
4296 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
4297 DAG.getConstant(isData, dl, MVT::i32));
4298}
4299
4302 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
4303
4304 // vastart just stores the address of the VarArgsFrameIndex slot into the
4305 // memory location argument.
4306 SDLoc dl(Op);
4308 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4309 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4310 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4311 MachinePointerInfo(SV));
4312}
4313
4314SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
4315 CCValAssign &NextVA,
4316 SDValue &Root,
4317 SelectionDAG &DAG,
4318 const SDLoc &dl) const {
4321
4322 const TargetRegisterClass *RC;
4323 if (AFI->isThumb1OnlyFunction())
4324 RC = &ARM::tGPRRegClass;
4325 else
4326 RC = &ARM::GPRRegClass;
4327
4328 // Transform the arguments stored in physical registers into virtual ones.
4329 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4330 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4331
4332 SDValue ArgValue2;
4333 if (NextVA.isMemLoc()) {
4334 MachineFrameInfo &MFI = MF.getFrameInfo();
4335 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
4336
4337 // Create load node to retrieve arguments from the stack.
4338 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4339 ArgValue2 = DAG.getLoad(
4340 MVT::i32, dl, Root, FIN,
4342 } else {
4343 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
4344 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4345 }
4346 if (!Subtarget->isLittle())
4347 std::swap (ArgValue, ArgValue2);
4348 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4349}
4350
4351// The remaining GPRs hold either the beginning of variable-argument
4352// data, or the beginning of an aggregate passed by value (usually
4353// byval). Either way, we allocate stack slots adjacent to the data
4354// provided by our caller, and store the unallocated registers there.
4355// If this is a variadic function, the va_list pointer will begin with
4356// these values; otherwise, this reassembles a (byval) structure that
4357// was split between registers and memory.
4358// Return: The frame index registers were stored into.
4359int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
4360 const SDLoc &dl, SDValue &Chain,
4361 const Value *OrigArg,
4362 unsigned InRegsParamRecordIdx,
4363 int ArgOffset, unsigned ArgSize) const {
4364 // Currently, two use-cases possible:
4365 // Case #1. Non-var-args function, and we meet first byval parameter.
4366 // Setup first unallocated register as first byval register;
4367 // eat all remained registers
4368 // (these two actions are performed by HandleByVal method).
4369 // Then, here, we initialize stack frame with
4370 // "store-reg" instructions.
4371 // Case #2. Var-args function, that doesn't contain byval parameters.
4372 // The same: eat all remained unallocated registers,
4373 // initialize stack frame.
4374
4376 MachineFrameInfo &MFI = MF.getFrameInfo();
4378 unsigned RBegin, REnd;
4379 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
4380 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
4381 } else {
4382 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4383 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
4384 REnd = ARM::R4;
4385 }
4386
4387 if (REnd != RBegin)
4388 ArgOffset = -4 * (ARM::R4 - RBegin);
4389
4390 auto PtrVT = getPointerTy(DAG.getDataLayout());
4391 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
4392 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
4393
4395 const TargetRegisterClass *RC =
4396 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4397
4398 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
4399 Register VReg = MF.addLiveIn(Reg, RC);
4400 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4401 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4402 MachinePointerInfo(OrigArg, 4 * i));
4403 MemOps.push_back(Store);
4404 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
4405 }
4406
4407 if (!MemOps.empty())
4408 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4409 return FrameIndex;
4410}
4411
4412// Setup stack frame, the va_list pointer will start from.
4413void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
4414 const SDLoc &dl, SDValue &Chain,
4415 unsigned ArgOffset,
4416 unsigned TotalArgRegsSaveSize,
4417 bool ForceMutable) const {
4420
4421 // Try to store any remaining integer argument regs
4422 // to their spots on the stack so that they may be loaded by dereferencing
4423 // the result of va_next.
4424 // If there is no regs to be stored, just point address after last
4425 // argument passed via stack.
4426 int FrameIndex = StoreByValRegs(
4427 CCInfo, DAG, dl, Chain, nullptr, CCInfo.getInRegsParamsCount(),
4428 CCInfo.getStackSize(), std::max(4U, TotalArgRegsSaveSize));
4429 AFI->setVarArgsFrameIndex(FrameIndex);
4430}
4431
4432bool ARMTargetLowering::splitValueIntoRegisterParts(
4433 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4434 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
4435 EVT ValueVT = Val.getValueType();
4436 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4437 unsigned ValueBits = ValueVT.getSizeInBits();
4438 unsigned PartBits = PartVT.getSizeInBits();
4439 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
4440 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
4441 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
4442 Parts[0] = Val;
4443 return true;
4444 }
4445 return false;
4446}
4447
4448SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4449 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
4450 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
4451 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4452 unsigned ValueBits = ValueVT.getSizeInBits();
4453 unsigned PartBits = PartVT.getSizeInBits();
4454 SDValue Val = Parts[0];
4455
4456 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
4457 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
4458 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
4459 return Val;
4460 }
4461 return SDValue();
4462}
4463
4464SDValue ARMTargetLowering::LowerFormalArguments(
4465 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4466 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4467 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4469 MachineFrameInfo &MFI = MF.getFrameInfo();
4470
4472
4473 // Assign locations to all of the incoming arguments.
4475 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4476 *DAG.getContext());
4477 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
4478
4479 SmallVector<SDValue, 16> ArgValues;
4480 SDValue ArgValue;
4482 unsigned CurArgIdx = 0;
4483
4484 // Initially ArgRegsSaveSize is zero.
4485 // Then we increase this value each time we meet byval parameter.
4486 // We also increase this value in case of varargs function.
4487 AFI->setArgRegsSaveSize(0);
4488
4489 // Calculate the amount of stack space that we need to allocate to store
4490 // byval and variadic arguments that are passed in registers.
4491 // We need to know this before we allocate the first byval or variadic
4492 // argument, as they will be allocated a stack slot below the CFA (Canonical
4493 // Frame Address, the stack pointer at entry to the function).
4494 unsigned ArgRegBegin = ARM::R4;
4495 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4496 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4497 break;
4498
4499 CCValAssign &VA = ArgLocs[i];
4500 unsigned Index = VA.getValNo();
4501 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4502 if (!Flags.isByVal())
4503 continue;
4504
4505 assert(VA.isMemLoc() && "unexpected byval pointer in reg");
4506 unsigned RBegin, REnd;
4507 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4508 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4509
4510 CCInfo.nextInRegsParam();
4511 }
4512 CCInfo.rewindByValRegsInfo();
4513
4514 int lastInsIndex = -1;
4515 if (isVarArg && MFI.hasVAStart()) {
4516 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4517 if (RegIdx != std::size(GPRArgRegs))
4518 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4519 }
4520
4521 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4522 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4523 auto PtrVT = getPointerTy(DAG.getDataLayout());
4524
4525 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4526 CCValAssign &VA = ArgLocs[i];
4527 if (Ins[VA.getValNo()].isOrigArg()) {
4528 std::advance(CurOrigArg,
4529 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4530 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4531 }
4532 // Arguments stored in registers.
4533 if (VA.isRegLoc()) {
4534 EVT RegVT = VA.getLocVT();
4535
4536 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
4537 // f64 and vector types are split up into multiple registers or
4538 // combinations of registers and stack slots.
4539 SDValue ArgValue1 =
4540 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4541 VA = ArgLocs[++i]; // skip ahead to next loc
4542 SDValue ArgValue2;
4543 if (VA.isMemLoc()) {
4544 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4545 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4546 ArgValue2 = DAG.getLoad(
4547 MVT::f64, dl, Chain, FIN,
4549 } else {
4550 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4551 }
4552 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4553 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4554 ArgValue1, DAG.getIntPtrConstant(0, dl));
4555 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4556 ArgValue2, DAG.getIntPtrConstant(1, dl));
4557 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
4558 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4559 } else {
4560 const TargetRegisterClass *RC;
4561
4562 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4563 RC = &ARM::HPRRegClass;
4564 else if (RegVT == MVT::f32)
4565 RC = &ARM::SPRRegClass;
4566 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4567 RegVT == MVT::v4bf16)
4568 RC = &ARM::DPRRegClass;
4569 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4570 RegVT == MVT::v8bf16)
4571 RC = &ARM::QPRRegClass;
4572 else if (RegVT == MVT::i32)
4573 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4574 : &ARM::GPRRegClass;
4575 else
4576 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
4577
4578 // Transform the arguments in physical registers into virtual ones.
4579 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4580 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4581
4582 // If this value is passed in r0 and has the returned attribute (e.g.
4583 // C++ 'structors), record this fact for later use.
4584 if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4585 AFI->setPreservesR0();
4586 }
4587 }
4588
4589 // If this is an 8 or 16-bit value, it is really passed promoted
4590 // to 32 bits. Insert an assert[sz]ext to capture this, then
4591 // truncate to the right size.
4592 switch (VA.getLocInfo()) {
4593 default: llvm_unreachable("Unknown loc info!");
4594 case CCValAssign::Full: break;
4595 case CCValAssign::BCvt:
4596 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4597 break;
4598 case CCValAssign::SExt:
4599 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
4600 DAG.getValueType(VA.getValVT()));
4601 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4602 break;
4603 case CCValAssign::ZExt:
4604 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
4605 DAG.getValueType(VA.getValVT()));
4606 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4607 break;
4608 }
4609
4610 // f16 arguments have their size extended to 4 bytes and passed as if they
4611 // had been copied to the LSBs of a 32-bit register.
4612 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
4613 if (VA.needsCustom() &&
4614 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
4615 ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
4616
4617 InVals.push_back(ArgValue);
4618 } else { // VA.isRegLoc()
4619 // Only arguments passed on the stack should make it here.
4620 assert(VA.isMemLoc());
4621 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
4622
4623 int index = VA.getValNo();
4624
4625 // Some Ins[] entries become multiple ArgLoc[] entries.
4626 // Process them only once.
4627 if (index != lastInsIndex)
4628 {
4629 ISD::ArgFlagsTy Flags = Ins[index].Flags;
4630 // FIXME: For now, all byval parameter objects are marked mutable.
4631 // This can be changed with more analysis.
4632 // In case of tail call optimization mark all arguments mutable.
4633 // Since they could be overwritten by lowering of arguments in case of
4634 // a tail call.
4635 if (Flags.isByVal()) {
4636 assert(Ins[index].isOrigArg() &&
4637 "Byval arguments cannot be implicit");
4638 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4639
4640 int FrameIndex = StoreByValRegs(
4641 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4642 VA.getLocMemOffset(), Flags.getByValSize());
4643 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4644 CCInfo.nextInRegsParam();
4645 } else {
4646 unsigned FIOffset = VA.getLocMemOffset();
4647 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4648 FIOffset, true);
4649
4650 // Create load nodes to retrieve arguments from the stack.
4651 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4652 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4654 DAG.getMachineFunction(), FI)));
4655 }
4656 lastInsIndex = index;
4657 }
4658 }
4659 }
4660
4661 // varargs
4662 if (isVarArg && MFI.hasVAStart()) {
4663 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getStackSize(),
4664 TotalArgRegsSaveSize);
4665 if (AFI->isCmseNSEntryFunction()) {
4668 "secure entry function must not be variadic", dl.getDebugLoc());
4669 DAG.getContext()->diagnose(Diag);
4670 }
4671 }
4672
4673 unsigned StackArgSize = CCInfo.getStackSize();
4674 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4675 if (canGuaranteeTCO(CallConv, TailCallOpt)) {
4676 // The only way to guarantee a tail call is if the callee restores its
4677 // argument area, but it must also keep the stack aligned when doing so.
4678 const DataLayout &DL = DAG.getDataLayout();
4679 StackArgSize = alignTo(StackArgSize, DL.getStackAlignment());
4680
4681 AFI->setArgumentStackToRestore(StackArgSize);
4682 }
4683 AFI->setArgumentStackSize(StackArgSize);
4684
4685 if (CCInfo.getStackSize() > 0 && AFI->isCmseNSEntryFunction()) {
4688 "secure entry function requires arguments on stack", dl.getDebugLoc());
4689 DAG.getContext()->diagnose(Diag);
4690 }
4691
4692 return Chain;
4693}
4694
4695/// isFloatingPointZero - Return true if this is +0.0.
4697 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
4698 return CFP->getValueAPF().isPosZero();
4699 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4700 // Maybe this has already been legalized into the constant pool?
4701 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4702 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4703 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
4704 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4705 return CFP->getValueAPF().isPosZero();
4706 }
4707 } else if (Op->getOpcode() == ISD::BITCAST &&
4708 Op->getValueType(0) == MVT::f64) {
4709 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4710 // created by LowerConstantFP().
4711 SDValue BitcastOp = Op->getOperand(0);
4712 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4713 isNullConstant(BitcastOp->getOperand(0)))
4714 return true;
4715 }
4716 return false;
4717}
4718
4719/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4720/// the given operands.
4721SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4722 SDValue &ARMcc, SelectionDAG &DAG,
4723 const SDLoc &dl) const {
4724 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4725 unsigned C = RHSC->getZExtValue();
4726 if (!isLegalICmpImmediate((int32_t)C)) {
4727 // Constant does not fit, try adjusting it by one.
4728 switch (CC) {
4729 default: break;
4730 case ISD::SETLT:
4731 case ISD::SETGE:
4732 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4734 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4735 }
4736 break;
4737 case ISD::SETULT:
4738 case ISD::SETUGE:
4739 if (C != 0 && isLegalICmpImmediate(C-1)) {
4741 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4742 }
4743 break;
4744 case ISD::SETLE:
4745 case ISD::SETGT:
4746 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4748 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4749 }
4750 break;
4751 case ISD::SETULE:
4752 case ISD::SETUGT:
4753 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4755 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4756 }
4757 break;
4758 }
4759 }
4760 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4762 // In ARM and Thumb-2, the compare instructions can shift their second
4763 // operand.
4765 std::swap(LHS, RHS);
4766 }
4767
4768 // Thumb1 has very limited immediate modes, so turning an "and" into a
4769 // shift can save multiple instructions.
4770 //
4771 // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4772 // into "((x << n) >> n)". But that isn't necessarily profitable on its
4773 // own. If it's the operand to an unsigned comparison with an immediate,
4774 // we can eliminate one of the shifts: we transform
4775 // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4776 //
4777 // We avoid transforming cases which aren't profitable due to encoding
4778 // details:
4779 //
4780 // 1. C2 fits into the immediate field of a cmp, and the transformed version
4781 // would not; in that case, we're essentially trading one immediate load for
4782 // another.
4783 // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4784 // 3. C2 is zero; we have other code for this special case.
4785 //
4786 // FIXME: Figure out profitability for Thumb2; we usually can't save an
4787 // instruction, since the AND is always one instruction anyway, but we could
4788 // use narrow instructions in some cases.
4789 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4790 LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4791 LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4792 !isSignedIntSetCC(CC)) {
4793 unsigned Mask = LHS.getConstantOperandVal(1);
4794 auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4795 uint64_t RHSV = RHSC->getZExtValue();
4796 if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4797 unsigned ShiftBits = llvm::countl_zero(Mask);
4798 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4799 SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4800 LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4801 RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4802 }
4803 }
4804 }
4805
4806 // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4807 // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4808 // way a cmp would.
4809 // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4810 // some tweaks to the heuristics for the previous and->shift transform.
4811 // FIXME: Optimize cases where the LHS isn't a shift.
4812 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4813 isa<ConstantSDNode>(RHS) && RHS->getAsZExtVal() == 0x80000000U &&
4814 CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4815 LHS.getConstantOperandVal(1) < 31) {
4816 unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1;
4817 SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
4818 DAG.getVTList(MVT::i32, MVT::i32),
4819 LHS.getOperand(0),
4820 DAG.getConstant(ShiftAmt, dl, MVT::i32));
4821 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
4822 Shift.getValue(1), SDValue());
4823 ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4824 return Chain.getValue(1);
4825 }
4826
4828
4829 // If the RHS is a constant zero then the V (overflow) flag will never be
4830 // set. This can allow us to simplify GE to PL or LT to MI, which can be
4831 // simpler for other passes (like the peephole optimiser) to deal with.
4832 if (isNullConstant(RHS)) {
4833 switch (CondCode) {
4834 default: break;
4835 case ARMCC::GE:
4837 break;
4838 case ARMCC::LT:
4840 break;
4841 }
4842 }
4843
4844 ARMISD::NodeType CompareType;
4845 switch (CondCode) {
4846 default:
4847 CompareType = ARMISD::CMP;
4848 break;
4849 case ARMCC::EQ:
4850 case ARMCC::NE:
4851 // Uses only Z Flag
4852 CompareType = ARMISD::CMPZ;
4853 break;
4854 }
4855 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4856 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
4857}
4858
4859/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4860SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4861 SelectionDAG &DAG, const SDLoc &dl,
4862 bool Signaling) const {
4863 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
4864 SDValue Cmp;
4865 if (!isFloatingPointZero(RHS))
4866 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
4867 dl, MVT::Glue, LHS, RHS);
4868 else
4869 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
4870 dl, MVT::Glue, LHS);
4871 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
4872}
4873
4874/// duplicateCmp - Glue values can have only one use, so this function
4875/// duplicates a comparison node.
4876SDValue
4877ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
4878 unsigned Opc = Cmp.getOpcode();
4879 SDLoc DL(Cmp);
4880 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
4881 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4882
4883 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
4884 Cmp = Cmp.getOperand(0);
4885 Opc = Cmp.getOpcode();
4886 if (Opc == ARMISD::CMPFP)
4887 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4888 else {
4889 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
4890 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
4891 }
4892 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
4893}
4894
4895// This function returns three things: the arithmetic computation itself
4896// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4897// comparison and the condition code define the case in which the arithmetic
4898// computation *does not* overflow.
4899std::pair<SDValue, SDValue>
4900ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4901 SDValue &ARMcc) const {
4902 assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
4903
4904 SDValue Value, OverflowCmp;
4905 SDValue LHS = Op.getOperand(0);
4906 SDValue RHS = Op.getOperand(1);
4907 SDLoc dl(Op);
4908
4909 // FIXME: We are currently always generating CMPs because we don't support
4910 // generating CMN through the backend. This is not as good as the natural
4911 // CMP case because it causes a register dependency and cannot be folded
4912 // later.
4913
4914 switch (Op.getOpcode()) {
4915 default:
4916 llvm_unreachable("Unknown overflow instruction!");
4917 case ISD::SADDO:
4918 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4919 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
4920 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4921 break;
4922 case ISD::UADDO:
4923 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4924 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
4925 // We do not use it in the USUBO case as Value may not be used.
4926 Value = DAG.getNode(ARMISD::ADDC, dl,
4927 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
4928 .getValue(0);
4929 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4930 break;
4931 case ISD::SSUBO:
4932 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4933 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4934 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4935 break;
4936 case ISD::USUBO:
4937 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4938 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4939 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4940 break;
4941 case ISD::UMULO:
4942 // We generate a UMUL_LOHI and then check if the high word is 0.
4943 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4944 Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4945 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4946 LHS, RHS);
4947 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4948 DAG.getConstant(0, dl, MVT::i32));
4949 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4950 break;
4951 case ISD::SMULO:
4952 // We generate a SMUL_LOHI and then check if all the bits of the high word
4953 // are the same as the sign bit of the low word.
4954 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4955 Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4956 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4957 LHS, RHS);
4958 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4959 DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4960 Value.getValue(0),
4961 DAG.getConstant(31, dl, MVT::i32)));
4962 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4963 break;
4964 } // switch (...)
4965
4966 return std::make_pair(Value, OverflowCmp);
4967}
4968
4969SDValue
4970ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4971 // Let legalize expand this if it isn't a legal type yet.
4972 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4973 return SDValue();
4974
4975 SDValue Value, OverflowCmp;
4976 SDValue ARMcc;
4977 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4978 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4979 SDLoc dl(Op);
4980 // We use 0 and 1 as false and true values.
4981 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4982 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4983 EVT VT = Op.getValueType();
4984
4985 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
4986 ARMcc, CCR, OverflowCmp);
4987
4988 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4989 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4990}
4991
4993 SelectionDAG &DAG) {
4994 SDLoc DL(BoolCarry);
4995 EVT CarryVT = BoolCarry.getValueType();
4996
4997 // This converts the boolean value carry into the carry flag by doing
4998 // ARMISD::SUBC Carry, 1
4999 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
5000 DAG.getVTList(CarryVT, MVT::i32),
5001 BoolCarry, DAG.getConstant(1, DL, CarryVT));
5002 return Carry.getValue(1);
5003}
5004
5006 SelectionDAG &DAG) {
5007 SDLoc DL(Flags);
5008
5009 // Now convert the carry flag into a boolean carry. We do this
5010 // using ARMISD:ADDE 0, 0, Carry
5011 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
5012 DAG.getConstant(0, DL, MVT::i32),
5013 DAG.getConstant(0, DL, MVT::i32), Flags);
5014}
5015
5016SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
5017 SelectionDAG &DAG) const {
5018 // Let legalize expand this if it isn't a legal type yet.
5019 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
5020 return SDValue();
5021
5022 SDValue LHS = Op.getOperand(0);
5023 SDValue RHS = Op.getOperand(1);
5024 SDLoc dl(Op);
5025
5026 EVT VT = Op.getValueType();
5027 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
5028 SDValue Value;
5029 SDValue Overflow;
5030 switch (Op.getOpcode()) {
5031 default:
5032 llvm_unreachable("Unknown overflow instruction!");
5033 case ISD::UADDO:
5034 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
5035 // Convert the carry flag into a boolean value.
5036 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5037 break;
5038 case ISD::USUBO: {
5039 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
5040 // Convert the carry flag into a boolean value.
5041 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5042 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
5043 // value. So compute 1 - C.
5044 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
5045 DAG.getConstant(1, dl, MVT::i32), Overflow);
5046 break;
5047 }
5048 }
5049
5050 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5051}
5052
5054 const ARMSubtarget *Subtarget) {
5055 EVT VT = Op.getValueType();
5056 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only())
5057 return SDValue();
5058 if (!VT.isSimple())
5059 return SDValue();
5060
5061 unsigned NewOpcode;
5062 switch (VT.getSimpleVT().SimpleTy) {
5063 default:
5064 return SDValue();
5065 case MVT::i8:
5066 switch (Op->getOpcode()) {
5067 case ISD::UADDSAT:
5068 NewOpcode = ARMISD::UQADD8b;
5069 break;
5070 case ISD::SADDSAT:
5071 NewOpcode = ARMISD::QADD8b;
5072 break;
5073 case ISD::USUBSAT:
5074 NewOpcode = ARMISD::UQSUB8b;
5075 break;
5076 case ISD::SSUBSAT:
5077 NewOpcode = ARMISD::QSUB8b;
5078 break;
5079 }
5080 break;
5081 case MVT::i16:
5082 switch (Op->getOpcode()) {
5083 case ISD::UADDSAT:
5084 NewOpcode = ARMISD::UQADD16b;
5085 break;
5086 case ISD::SADDSAT:
5087 NewOpcode = ARMISD::QADD16b;
5088 break;
5089 case ISD::USUBSAT:
5090 NewOpcode = ARMISD::UQSUB16b;
5091 break;
5092 case ISD::SSUBSAT:
5093 NewOpcode = ARMISD::QSUB16b;
5094 break;
5095 }
5096 break;
5097 }
5098
5099 SDLoc dl(Op);
5100 SDValue Add =
5101 DAG.getNode(NewOpcode, dl, MVT::i32,
5102 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
5103 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
5104 return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
5105}
5106
5107SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
5108 SDValue Cond = Op.getOperand(0);
5109 SDValue SelectTrue = Op.getOperand(1);
5110 SDValue SelectFalse = Op.getOperand(2);
5111 SDLoc dl(Op);
5112 unsigned Opc = Cond.getOpcode();
5113
5114 if (Cond.getResNo() == 1 &&
5115 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5116 Opc == ISD::USUBO)) {
5117 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5118 return SDValue();
5119
5120 SDValue Value, OverflowCmp;
5121 SDValue ARMcc;
5122 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5123 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5124 EVT VT = Op.getValueType();
5125
5126 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
5127 OverflowCmp, DAG);
5128 }
5129
5130 // Convert:
5131 //
5132 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
5133 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
5134 //
5135 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
5136 const ConstantSDNode *CMOVTrue =
5137 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
5138 const ConstantSDNode *CMOVFalse =
5139 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
5140
5141 if (CMOVTrue && CMOVFalse) {
5142 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
5143 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
5144
5145 SDValue True;
5146 SDValue False;
5147 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
5148 True = SelectTrue;
5149 False = SelectFalse;
5150 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
5151 True = SelectFalse;
5152 False = SelectTrue;
5153 }
5154
5155 if (True.getNode() && False.getNode()) {
5156 EVT VT = Op.getValueType();
5157 SDValue ARMcc = Cond.getOperand(2);
5158 SDValue CCR = Cond.getOperand(3);
5159 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
5160 assert(True.getValueType() == VT);
5161 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
5162 }
5163 }
5164 }
5165
5166 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
5167 // undefined bits before doing a full-word comparison with zero.
5168 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
5169 DAG.getConstant(1, dl, Cond.getValueType()));
5170
5171 return DAG.getSelectCC(dl, Cond,
5172 DAG.getConstant(0, dl, Cond.getValueType()),
5173 SelectTrue, SelectFalse, ISD::SETNE);
5174}
5175
5177 bool &swpCmpOps, bool &swpVselOps) {
5178 // Start by selecting the GE condition code for opcodes that return true for
5179 // 'equality'
5180 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
5181 CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
5182 CondCode = ARMCC::GE;
5183
5184 // and GT for opcodes that return false for 'equality'.
5185 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
5186 CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
5187 CondCode = ARMCC::GT;
5188
5189 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
5190 // to swap the compare operands.
5191 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
5192 CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
5193 swpCmpOps = true;
5194
5195 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
5196 // If we have an unordered opcode, we need to swap the operands to the VSEL
5197 // instruction (effectively negating the condition).
5198 //
5199 // This also has the effect of swapping which one of 'less' or 'greater'
5200 // returns true, so we also swap the compare operands. It also switches
5201 // whether we return true for 'equality', so we compensate by picking the
5202 // opposite condition code to our original choice.
5203 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
5204 CC == ISD::SETUGT) {
5205 swpCmpOps = !swpCmpOps;
5206 swpVselOps = !swpVselOps;
5207 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
5208 }
5209
5210 // 'ordered' is 'anything but unordered', so use the VS condition code and
5211 // swap the VSEL operands.
5212 if (CC == ISD::SETO) {
5213 CondCode = ARMCC::VS;
5214 swpVselOps = true;
5215 }
5216
5217 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
5218 // code and swap the VSEL operands. Also do this if we don't care about the
5219 // unordered case.
5220 if (CC == ISD::SETUNE || CC == ISD::SETNE) {
5221 CondCode = ARMCC::EQ;
5222 swpVselOps = true;
5223 }
5224}
5225
5226SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
5227 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
5228 SDValue Cmp, SelectionDAG &DAG) const {
5229 if (!Subtarget->hasFP64() && VT == MVT::f64) {
5231 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
5233 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
5234
5235 SDValue TrueLow = TrueVal.getValue(0);
5236 SDValue TrueHigh = TrueVal.getValue(1);
5237 SDValue FalseLow = FalseVal.getValue(0);
5238 SDValue FalseHigh = FalseVal.getValue(1);
5239
5240 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
5241 ARMcc, CCR, Cmp);
5242 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
5243 ARMcc, CCR, duplicateCmp(Cmp, DAG));
5244
5245 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
5246 } else {
5247 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
5248 Cmp);
5249 }
5250}
5251
5253 return CC == ISD::SETGT || CC == ISD::SETGE;
5254}
5255
5257 return CC == ISD::SETLT || CC == ISD::SETLE;
5258}
5259
5260// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5261// All of these conditions (and their <= and >= counterparts) will do:
5262// x < k ? k : x
5263// x > k ? x : k
5264// k < x ? x : k
5265// k > x ? k : x
5266static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
5267 const SDValue TrueVal, const SDValue FalseVal,
5268 const ISD::CondCode CC, const SDValue K) {
5269 return (isGTorGE(CC) &&
5270 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
5271 (isLTorLE(CC) &&
5272 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
5273}
5274
5275// Check if two chained conditionals could be converted into SSAT or USAT.
5276//
5277// SSAT can replace a set of two conditional selectors that bound a number to an
5278// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
5279//
5280// x < -k ? -k : (x > k ? k : x)
5281// x < -k ? -k : (x < k ? x : k)
5282// x > -k ? (x > k ? k : x) : -k
5283// x < k ? (x < -k ? -k : x) : k
5284// etc.
5285//
5286// LLVM canonicalizes these to either a min(max()) or a max(min())
5287// pattern. This function tries to match one of these and will return a SSAT
5288// node if successful.
5289//
5290// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
5291// is a power of 2.
5293 EVT VT = Op.getValueType();
5294 SDValue V1 = Op.getOperand(0);
5295 SDValue K1 = Op.getOperand(1);
5296 SDValue TrueVal1 = Op.getOperand(2);
5297 SDValue FalseVal1 = Op.getOperand(3);
5298 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5299
5300 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5301 if (Op2.getOpcode() != ISD::SELECT_CC)
5302 return SDValue();
5303
5304 SDValue V2 = Op2.getOperand(0);
5305 SDValue K2 = Op2.getOperand(1);
5306 SDValue TrueVal2 = Op2.getOperand(2);
5307 SDValue FalseVal2 = Op2.getOperand(3);
5308 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5309
5310 SDValue V1Tmp = V1;
5311 SDValue V2Tmp = V2;
5312
5313 // Check that the registers and the constants match a max(min()) or min(max())
5314 // pattern
5315 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5316 K2 != FalseVal2 ||
5317 !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
5318 return SDValue();
5319
5320 // Check that the constant in the lower-bound check is
5321 // the opposite of the constant in the upper-bound check
5322 // in 1's complement.
5323 if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
5324 return SDValue();
5325
5326 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5327 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5328 int64_t PosVal = std::max(Val1, Val2);
5329 int64_t NegVal = std::min(Val1, Val2);
5330
5331 if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
5332 !isPowerOf2_64(PosVal + 1))
5333 return SDValue();
5334
5335 // Handle the difference between USAT (unsigned) and SSAT (signed)
5336 // saturation
5337 // At this point, PosVal is guaranteed to be positive
5338 uint64_t K = PosVal;
5339 SDLoc dl(Op);
5340 if (Val1 == ~Val2)
5341 return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5342 DAG.getConstant(llvm::countr_one(K), dl, VT));
5343 if (NegVal == 0)
5344 return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
5345 DAG.getConstant(llvm::countr_one(K), dl, VT));
5346
5347 return SDValue();
5348}
5349
5350// Check if a condition of the type x < k ? k : x can be converted into a
5351// bit operation instead of conditional moves.
5352// Currently this is allowed given:
5353// - The conditions and values match up
5354// - k is 0 or -1 (all ones)
5355// This function will not check the last condition, thats up to the caller
5356// It returns true if the transformation can be made, and in such case
5357// returns x in V, and k in SatK.
5359 SDValue &SatK)
5360{
5361 SDValue LHS = Op.getOperand(0);
5362 SDValue RHS = Op.getOperand(1);
5363 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5364 SDValue TrueVal = Op.getOperand(2);
5365 SDValue FalseVal = Op.getOperand(3);
5366
5367 SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
5368 ? &RHS
5369 : nullptr;
5370
5371 // No constant operation in comparison, early out
5372 if (!K)
5373 return false;
5374
5375 SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
5376 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5377 SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
5378
5379 // If the constant on left and right side, or variable on left and right,
5380 // does not match, early out
5381 if (*K != KTmp || V != VTmp)
5382 return false;
5383
5384 if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
5385 SatK = *K;
5386 return true;
5387 }
5388
5389 return false;
5390}
5391
5392bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
5393 if (VT == MVT::f32)
5394 return !Subtarget->hasVFP2Base();
5395 if (VT == MVT::f64)
5396 return !Subtarget->hasFP64();
5397 if (VT == MVT::f16)
5398 return !Subtarget->hasFullFP16();
5399 return false;
5400}
5401
5402SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5403 EVT VT = Op.getValueType();
5404 SDLoc dl(Op);
5405
5406 // Try to convert two saturating conditional selects into a single SSAT
5407 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5408 if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
5409 return SatValue;
5410
5411 // Try to convert expressions of the form x < k ? k : x (and similar forms)
5412 // into more efficient bit operations, which is possible when k is 0 or -1
5413 // On ARM and Thumb-2 which have flexible operand 2 this will result in
5414 // single instructions. On Thumb the shift and the bit operation will be two
5415 // instructions.
5416 // Only allow this transformation on full-width (32-bit) operations
5417 SDValue LowerSatConstant;
5418 SDValue SatValue;
5419 if (VT == MVT::i32 &&
5420 isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
5421 SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
5422 DAG.getConstant(31, dl, VT));
5423 if (isNullConstant(LowerSatConstant)) {
5424 SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
5425 DAG.getAllOnesConstant(dl, VT));
5426 return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
5427 } else if (isAllOnesConstant(LowerSatConstant))
5428 return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
5429 }
5430
5431 SDValue LHS = Op.getOperand(0);
5432 SDValue RHS = Op.getOperand(1);
5433 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5434 SDValue TrueVal = Op.getOperand(2);
5435 SDValue FalseVal = Op.getOperand(3);
5436 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
5437 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
5438
5439 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5440 LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
5441 unsigned TVal = CTVal->getZExtValue();
5442 unsigned FVal = CFVal->getZExtValue();
5443 unsigned Opcode = 0;
5444
5445 if (TVal == ~FVal) {
5446 Opcode = ARMISD::CSINV;
5447 } else if (TVal == ~FVal + 1) {
5448 Opcode = ARMISD::CSNEG;
5449 } else if (TVal + 1 == FVal) {
5450 Opcode = ARMISD::CSINC;
5451 } else if (TVal == FVal + 1) {
5452 Opcode = ARMISD::CSINC;
5453 std::swap(TrueVal, FalseVal);
5454 std::swap(TVal, FVal);
5455 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5456 }
5457
5458 if (Opcode) {
5459 // If one of the constants is cheaper than another, materialise the
5460 // cheaper one and let the csel generate the other.
5461 if (Opcode != ARMISD::CSINC &&
5462 HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
5463 std::swap(TrueVal, FalseVal);
5464 std::swap(TVal, FVal);
5465 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5466 }
5467
5468 // Attempt to use ZR checking TVal is 0, possibly inverting the condition
5469 // to get there. CSINC not is invertable like the other two (~(~a) == a,
5470 // -(-a) == a, but (a+1)+1 != a).
5471 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5472 std::swap(TrueVal, FalseVal);
5473 std::swap(TVal, FVal);
5474 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5475 }
5476
5477 // Drops F's value because we can get it by inverting/negating TVal.
5478 FalseVal = TrueVal;
5479
5480 SDValue ARMcc;
5481 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5482 EVT VT = TrueVal.getValueType();
5483 return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5484 }
5485 }
5486
5487 if (isUnsupportedFloatingType(LHS.getValueType())) {
5489 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5490
5491 // If softenSetCCOperands only returned one value, we should compare it to
5492 // zero.
5493 if (!RHS.getNode()) {
5494 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5495 CC = ISD::SETNE;
5496 }
5497 }
5498
5499 if (LHS.getValueType() == MVT::i32) {
5500 // Try to generate VSEL on ARMv8.
5501 // The VSEL instruction can't use all the usual ARM condition
5502 // codes: it only has two bits to select the condition code, so it's
5503 // constrained to use only GE, GT, VS and EQ.
5504 //
5505 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5506 // swap the operands of the previous compare instruction (effectively
5507 // inverting the compare condition, swapping 'less' and 'greater') and
5508 // sometimes need to swap the operands to the VSEL (which inverts the
5509 // condition in the sense of firing whenever the previous condition didn't)
5510 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5511 TrueVal.getValueType() == MVT::f32 ||
5512 TrueVal.getValueType() == MVT::f64)) {
5514 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5515 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5516 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5517 std::swap(TrueVal, FalseVal);
5518 }
5519 }
5520
5521 SDValue ARMcc;
5522 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5523 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5524 // Choose GE over PL, which vsel does now support
5525 if (ARMcc->getAsZExtVal() == ARMCC::PL)
5526 ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5527 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5528 }
5529
5530 ARMCC::CondCodes CondCode, CondCode2;
5531 FPCCToARMCC(CC, CondCode, CondCode2);
5532
5533 // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5534 // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5535 // must use VSEL (limited condition codes), due to not having conditional f16
5536 // moves.
5537 if (Subtarget->hasFPARMv8Base() &&
5538 !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5539 (TrueVal.getValueType() == MVT::f16 ||
5540 TrueVal.getValueType() == MVT::f32 ||
5541 TrueVal.getValueType() == MVT::f64)) {
5542 bool swpCmpOps = false;
5543 bool swpVselOps = false;
5544 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5545
5546 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5547 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5548 if (swpCmpOps)
5549 std::swap(LHS, RHS);
5550 if (swpVselOps)
5551 std::swap(TrueVal, FalseVal);
5552 }
5553 }
5554
5555 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5556 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5557 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5558 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5559 if (CondCode2 != ARMCC::AL) {
5560 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5561 // FIXME: Needs another CMP because flag can have but one use.
5562 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
5563 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
5564 }
5565 return Result;
5566}
5567
5568/// canChangeToInt - Given the fp compare operand, return true if it is suitable
5569/// to morph to an integer compare sequence.
5570static bool canChangeToInt(SDValue Op, bool &SeenZero,
5571 const ARMSubtarget *Subtarget) {
5572 SDNode *N = Op.getNode();
5573 if (!N->hasOneUse())
5574 // Otherwise it requires moving the value from fp to integer registers.
5575 return false;
5576 if (!N->getNumValues())
5577 return false;
5578 EVT VT = Op.getValueType();
5579 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5580 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5581 // vmrs are very slow, e.g. cortex-a8.
5582 return false;
5583
5584 if (isFloatingPointZero(Op)) {
5585 SeenZero = true;
5586 return true;
5587 }
5588 return ISD::isNormalLoad(N);
5589}
5590
5593 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5594
5595 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
5596 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5597 Ld->getPointerInfo(), Ld->getAlign(),
5598 Ld->getMemOperand()->getFlags());
5599
5600 llvm_unreachable("Unknown VFP cmp argument!");
5601}
5602
5604 SDValue &RetVal1, SDValue &RetVal2) {
5605 SDLoc dl(Op);
5606
5607 if (isFloatingPointZero(Op)) {
5608 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5609 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5610 return;
5611 }
5612
5613 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5614 SDValue Ptr = Ld->getBasePtr();
5615 RetVal1 =
5616 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5617 Ld->getAlign(), Ld->getMemOperand()->getFlags());
5618
5619 EVT PtrType = Ptr.getValueType();
5620 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5621 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5622 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5623 Ld->getPointerInfo().getWithOffset(4),
5624 commonAlignment(Ld->getAlign(), 4),
5625 Ld->getMemOperand()->getFlags());
5626 return;
5627 }
5628
5629 llvm_unreachable("Unknown VFP cmp argument!");
5630}
5631
5632/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5633/// f32 and even f64 comparisons to integer ones.
5634SDValue
5635ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5636 SDValue Chain = Op.getOperand(0);
5637 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5638 SDValue LHS = Op.getOperand(2);
5639 SDValue RHS = Op.getOperand(3);
5640 SDValue Dest = Op.getOperand(4);
5641 SDLoc dl(Op);
5642
5643 bool LHSSeenZero = false;
5644 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5645 bool RHSSeenZero = false;
5646 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5647 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5648 // If unsafe fp math optimization is enabled and there are no other uses of
5649 // the CMP operands, and the condition code is EQ or NE, we can optimize it
5650 // to an integer comparison.
5651 if (CC == ISD::SETOEQ)
5652 CC = ISD::SETEQ;
5653 else if (CC == ISD::SETUNE)
5654 CC = ISD::SETNE;
5655
5656 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5657 SDValue ARMcc;
5658 if (LHS.getValueType() == MVT::f32) {
5659 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5660 bitcastf32Toi32(LHS, DAG), Mask);
5661 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5662 bitcastf32Toi32(RHS, DAG), Mask);
5663 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5664 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5665 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5666 Chain, Dest, ARMcc, CCR, Cmp);
5667 }
5668
5669 SDValue LHS1, LHS2;
5670 SDValue RHS1, RHS2;
5671 expandf64Toi32(LHS, DAG, LHS1, LHS2);
5672 expandf64Toi32(RHS, DAG, RHS1, RHS2);
5673 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5674 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5676 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5677 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5678 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5679 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
5680 }
5681
5682 return SDValue();
5683}
5684
5685SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5686 SDValue Chain = Op.getOperand(0);
5687 SDValue Cond = Op.getOperand(1);
5688 SDValue Dest = Op.getOperand(2);
5689 SDLoc dl(Op);
5690
5691 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5692 // instruction.
5693 unsigned Opc = Cond.getOpcode();
5694 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5695 !Subtarget->isThumb1Only();
5696 if (Cond.getResNo() == 1 &&
5697 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5698 Opc == ISD::USUBO || OptimizeMul)) {
5699 // Only lower legal XALUO ops.
5700 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5701 return SDValue();
5702
5703 // The actual operation with overflow check.
5704 SDValue Value, OverflowCmp;
5705 SDValue ARMcc;
5706 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5707
5708 // Reverse the condition code.
5710 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5712 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5713 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5714
5715 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5716 OverflowCmp);
5717 }
5718
5719 return SDValue();
5720}
5721
5722SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5723 SDValue Chain = Op.getOperand(0);
5724 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5725 SDValue LHS = Op.getOperand(2);
5726 SDValue RHS = Op.getOperand(3);
5727 SDValue Dest = Op.getOperand(4);
5728 SDLoc dl(Op);
5729
5730 if (isUnsupportedFloatingType(LHS.getValueType())) {
5732 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5733
5734 // If softenSetCCOperands only returned one value, we should compare it to
5735 // zero.
5736 if (!RHS.getNode()) {
5737 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5738 CC = ISD::SETNE;
5739 }
5740 }
5741
5742 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5743 // instruction.
5744 unsigned Opc = LHS.getOpcode();
5745 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5746 !Subtarget->isThumb1Only();
5747 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5748 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5749 Opc == ISD::USUBO || OptimizeMul) &&
5750 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5751 // Only lower legal XALUO ops.
5752 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5753 return SDValue();
5754
5755 // The actual operation with overflow check.
5756 SDValue Value, OverflowCmp;
5757 SDValue ARMcc;
5758 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5759
5760 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5761 // Reverse the condition code.
5763 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5765 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5766 }
5767 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5768
5769 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5770 OverflowCmp);
5771 }
5772
5773 if (LHS.getValueType() == MVT::i32) {
5774 SDValue ARMcc;
5775 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5776 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5777 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5778 Chain, Dest, ARMcc, CCR, Cmp);
5779 }
5780
5781 if (getTargetMachine().Options.UnsafeFPMath &&
5782 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
5783 CC == ISD::SETNE || CC == ISD::SETUNE)) {
5784 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5785 return Result;
5786 }
5787
5788 ARMCC::CondCodes CondCode, CondCode2;
5789 FPCCToARMCC(CC, CondCode, CondCode2);
5790
5791 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5792 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5793 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5794 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5795 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
5796 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5797 if (CondCode2 != ARMCC::AL) {
5798 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5799 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
5800 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5801 }
5802 return Res;
5803}
5804
5805SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5806 SDValue Chain = Op.getOperand(0);
5807 SDValue Table = Op.getOperand(1);
5808 SDValue Index = Op.getOperand(2);
5809 SDLoc dl(Op);
5810
5811 EVT PTy = getPointerTy(DAG.getDataLayout());
5812 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5813 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5814 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5815 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5816 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5817 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5818 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5819 // which does another jump to the destination. This also makes it easier
5820 // to translate it to TBB / TBH later (Thumb2 only).
5821 // FIXME: This might not work if the function is extremely large.
5822 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5823 Addr, Op.getOperand(2), JTI);
5824 }
5825 if (isPositionIndependent() || Subtarget->isROPI()) {
5826 Addr =
5827 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5829 Chain = Addr.getValue(1);
5830 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5831 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5832 } else {
5833 Addr =
5834 DAG.getLoad(PTy, dl, Chain, Addr,
5836 Chain = Addr.getValue(1);
5837 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5838 }
5839}
5840
5842 EVT VT = Op.getValueType();
5843 SDLoc dl(Op);
5844
5845 if (Op.getValueType().getVectorElementType() == MVT::i32) {
5846 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5847 return Op;
5848 return DAG.UnrollVectorOp(Op.getNode());
5849 }
5850
5851 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5852
5853 EVT NewTy;
5854 const EVT OpTy = Op.getOperand(0).getValueType();
5855 if (OpTy == MVT::v4f32)
5856 NewTy = MVT::v4i32;
5857 else if (OpTy == MVT::v4f16 && HasFullFP16)
5858 NewTy = MVT::v4i16;
5859 else if (OpTy == MVT::v8f16 && HasFullFP16)
5860 NewTy = MVT::v8i16;
5861 else
5862 llvm_unreachable("Invalid type for custom lowering!");
5863
5864 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5865 return DAG.UnrollVectorOp(Op.getNode());
5866
5867 Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5868 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5869}
5870
5871SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5872 EVT VT = Op.getValueType();
5873 if (VT.isVector())
5874 return LowerVectorFP_TO_INT(Op, DAG);
5875
5876 bool IsStrict = Op->isStrictFPOpcode();
5877 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5878
5879 if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5880 RTLIB::Libcall LC;
5881 if (Op.getOpcode() == ISD::FP_TO_SINT ||
5882 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
5883 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5884 Op.getValueType());
5885 else
5886 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5887 Op.getValueType());
5888 SDLoc Loc(Op);
5889 MakeLibCallOptions CallOptions;
5890 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5892 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5893 CallOptions, Loc, Chain);
5894 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5895 }
5896
5897 // FIXME: Remove this when we have strict fp instruction selection patterns
5898 if (IsStrict) {
5899 SDLoc Loc(Op);
5900 SDValue Result =
5903 Loc, Op.getValueType(), SrcVal);
5904 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5905 }
5906
5907 return Op;
5908}
5909
5911 const ARMSubtarget *Subtarget) {
5912 EVT VT = Op.getValueType();
5913 EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5914 EVT FromVT = Op.getOperand(0).getValueType();
5915
5916 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
5917 return Op;
5918 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
5919 Subtarget->hasFP64())
5920 return Op;
5921 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
5922 Subtarget->hasFullFP16())
5923 return Op;
5924 if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
5925 Subtarget->hasMVEFloatOps())
5926 return Op;
5927 if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
5928 Subtarget->hasMVEFloatOps())
5929 return Op;
5930
5931 if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
5932 return SDValue();
5933
5934 SDLoc DL(Op);
5935 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
5936 unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
5937 SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
5938 DAG.getValueType(VT.getScalarType()));
5939 SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,
5940 DAG.getConstant((1 << BW) - 1, DL, VT));
5941 if (IsSigned)
5942 Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
5943 DAG.getConstant(-(1 << BW), DL, VT));
5944 return Max;
5945}
5946
5948 EVT VT = Op.getValueType();
5949 SDLoc dl(Op);
5950
5951 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
5952 if (VT.getVectorElementType() == MVT::f32)
5953 return Op;
5954 return DAG.UnrollVectorOp(Op.getNode());
5955 }
5956
5957 assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
5958 Op.getOperand(0).getValueType() == MVT::v8i16) &&
5959 "Invalid type for custom lowering!");
5960
5961 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5962
5963 EVT DestVecType;
5964 if (VT == MVT::v4f32)
5965 DestVecType = MVT::v4i32;
5966 else if (VT == MVT::v4f16 && HasFullFP16)
5967 DestVecType = MVT::v4i16;
5968 else if (VT == MVT::v8f16 && HasFullFP16)
5969 DestVecType = MVT::v8i16;
5970 else
5971 return DAG.UnrollVectorOp(Op.getNode());
5972
5973 unsigned CastOpc;
5974 unsigned Opc;
5975 switch (Op.getOpcode()) {
5976 default: llvm_unreachable("Invalid opcode!");
5977 case ISD::SINT_TO_FP:
5978 CastOpc = ISD::SIGN_EXTEND;
5979 Opc = ISD::SINT_TO_FP;
5980 break;
5981 case ISD::UINT_TO_FP:
5982 CastOpc = ISD::ZERO_EXTEND;
5983 Opc = ISD::UINT_TO_FP;
5984 break;
5985 }
5986
5987 Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
5988 return DAG.getNode(Opc, dl, VT, Op);
5989}
5990
5991SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
5992 EVT VT = Op.getValueType();
5993 if (VT.isVector())
5994 return LowerVectorINT_TO_FP(Op, DAG);
5995 if (isUnsupportedFloatingType(VT)) {
5996 RTLIB::Libcall LC;
5997 if (Op.getOpcode() == ISD::SINT_TO_FP)
5998 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
5999 Op.getValueType());
6000 else
6001 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
6002 Op.getValueType());
6003 MakeLibCallOptions CallOptions;
6004 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
6005 CallOptions, SDLoc(Op)).first;
6006 }
6007
6008 return Op;
6009}
6010
6011SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
6012 // Implement fcopysign with a fabs and a conditional fneg.
6013 SDValue Tmp0 = Op.getOperand(0);
6014 SDValue Tmp1 = Op.getOperand(1);
6015 SDLoc dl(Op);
6016 EVT VT = Op.getValueType();
6017 EVT SrcVT = Tmp1.getValueType();
6018 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
6019 Tmp0.getOpcode() == ARMISD::VMOVDRR;
6020 bool UseNEON = !InGPR && Subtarget->hasNEON();
6021
6022 if (UseNEON) {
6023 // Use VBSL to copy the sign bit.
6024 unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
6025 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
6026 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
6027 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
6028 if (VT == MVT::f64)
6029 Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6030 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
6031 DAG.getConstant(32, dl, MVT::i32));
6032 else /*if (VT == MVT::f32)*/
6033 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
6034 if (SrcVT == MVT::f32) {
6035 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
6036 if (VT == MVT::f64)
6037 Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6038 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
6039 DAG.getConstant(32, dl, MVT::i32));
6040 } else if (VT == MVT::f32)
6041 Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
6042 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
6043 DAG.getConstant(32, dl, MVT::i32));
6044 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
6045 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
6046
6048 dl, MVT::i32);
6049 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
6050 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
6051 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
6052
6053 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
6054 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
6055 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
6056 if (VT == MVT::f32) {
6057 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
6058 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
6059 DAG.getConstant(0, dl, MVT::i32));
6060 } else {
6061 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
6062 }
6063
6064 return Res;
6065 }
6066
6067 // Bitcast operand 1 to i32.
6068 if (SrcVT == MVT::f64)
6069 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6070 Tmp1).getValue(1);
6071 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
6072
6073 // Or in the signbit with integer operations.
6074 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
6075 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
6076 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
6077 if (VT == MVT::f32) {
6078 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
6079 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
6080 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
6081 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
6082 }
6083
6084 // f64: Or the high part with signbit and then combine two parts.
6085 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6086 Tmp0);
6087 SDValue Lo = Tmp0.getValue(0);
6088 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
6089 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
6090 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
6091}
6092
6093SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
6095 MachineFrameInfo &MFI = MF.getFrameInfo();
6096 MFI.setReturnAddressIsTaken(true);
6097
6099 return SDValue();
6100
6101 EVT VT = Op.getValueType();
6102 SDLoc dl(Op);
6103 unsigned Depth = Op.getConstantOperandVal(0);
6104 if (Depth) {
6105 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
6106 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
6107 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
6108 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
6110 }
6111
6112 // Return LR, which contains the return address. Mark it an implicit live-in.
6113 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
6114 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
6115}
6116
6117SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
6118 const ARMBaseRegisterInfo &ARI =
6119 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
6121 MachineFrameInfo &MFI = MF.getFrameInfo();
6122 MFI.setFrameAddressIsTaken(true);
6123
6124 EVT VT = Op.getValueType();
6125 SDLoc dl(Op); // FIXME probably not meaningful
6126 unsigned Depth = Op.getConstantOperandVal(0);
6127 Register FrameReg = ARI.getFrameRegister(MF);
6128 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
6129 while (Depth--)
6130 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
6132 return FrameAddr;
6133}
6134
6135// FIXME? Maybe this could be a TableGen attribute on some registers and
6136// this table could be generated automatically from RegInfo.
6137Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
6138 const MachineFunction &MF) const {
6140 .Case("sp", ARM::SP)
6141 .Default(0);
6142 if (Reg)
6143 return Reg;
6144 report_fatal_error(Twine("Invalid register name \""
6145 + StringRef(RegName) + "\"."));
6146}
6147
6148// Result is 64 bit value so split into two 32 bit values and return as a
6149// pair of values.
6151 SelectionDAG &DAG) {
6152 SDLoc DL(N);
6153
6154 // This function is only supposed to be called for i64 type destination.
6155 assert(N->getValueType(0) == MVT::i64
6156 && "ExpandREAD_REGISTER called for non-i64 type result.");
6157
6159 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
6160 N->getOperand(0),
6161 N->getOperand(1));
6162
6163 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
6164 Read.getValue(1)));
6165 Results.push_back(Read.getOperand(0));
6166}
6167
6168/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
6169/// When \p DstVT, the destination type of \p BC, is on the vector
6170/// register bank and the source of bitcast, \p Op, operates on the same bank,
6171/// it might be possible to combine them, such that everything stays on the
6172/// vector register bank.
6173/// \p return The node that would replace \p BT, if the combine
6174/// is possible.
6176 SelectionDAG &DAG) {
6177 SDValue Op = BC->getOperand(0);
6178 EVT DstVT = BC->getValueType(0);
6179
6180 // The only vector instruction that can produce a scalar (remember,
6181 // since the bitcast was about to be turned into VMOVDRR, the source
6182 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
6183 // Moreover, we can do this combine only if there is one use.
6184 // Finally, if the destination type is not a vector, there is not
6185 // much point on forcing everything on the vector bank.
6186 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6187 !Op.hasOneUse())
6188 return SDValue();
6189
6190 // If the index is not constant, we will introduce an additional
6191 // multiply that will stick.
6192 // Give up in that case.
6193 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6194 if (!Index)
6195 return SDValue();
6196 unsigned DstNumElt = DstVT.getVectorNumElements();
6197
6198 // Compute the new index.
6199 const APInt &APIntIndex = Index->getAPIntValue();
6200 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
6201 NewIndex *= APIntIndex;
6202 // Check if the new constant index fits into i32.
6203 if (NewIndex.getBitWidth() > 32)
6204 return SDValue();
6205
6206 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
6207 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
6208 SDLoc dl(Op);
6209 SDValue ExtractSrc = Op.getOperand(0);
6210 EVT VecVT = EVT::getVectorVT(
6211 *DAG.getContext(), DstVT.getScalarType(),
6212 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
6213 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
6214 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
6215 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
6216}
6217
6218/// ExpandBITCAST - If the target supports VFP, this function is called to
6219/// expand a bit convert where either the source or destination type is i64 to
6220/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
6221/// operand type is illegal (e.g., v2f32 for a target that doesn't support
6222/// vectors), since the legalizer won't know what to do with that.
6223SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
6224 const ARMSubtarget *Subtarget) const {
6225 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6226 SDLoc dl(N);
6227 SDValue Op = N->getOperand(0);
6228
6229 // This function is only supposed to be called for i16 and i64 types, either
6230 // as the source or destination of the bit convert.
6231 EVT SrcVT = Op.getValueType();
6232 EVT DstVT = N->getValueType(0);
6233
6234 if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
6235 (DstVT == MVT::f16 || DstVT == MVT::bf16))
6236 return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),
6237 DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
6238
6239 if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
6240 (SrcVT == MVT::f16 || SrcVT == MVT::bf16))
6241 return DAG.getNode(
6242 ISD::TRUNCATE, SDLoc(N), DstVT,
6243 MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
6244
6245 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
6246 return SDValue();
6247
6248 // Turn i64->f64 into VMOVDRR.
6249 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
6250 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
6251 // if we can combine the bitcast with its source.
6253 return Val;
6254 SDValue Lo, Hi;
6255 std::tie(Lo, Hi) = DAG.SplitScalar(Op, dl, MVT::i32, MVT::i32);
6256 return DAG.getNode(ISD::BITCAST, dl, DstVT,
6257 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
6258 }
6259
6260 // Turn f64->i64 into VMOVRRD.
6261 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
6262 SDValue Cvt;
6263 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
6264 SrcVT.getVectorNumElements() > 1)
6265 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6266 DAG.getVTList(MVT::i32, MVT::i32),
6267 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
6268 else
6269 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6270 DAG.getVTList(MVT::i32, MVT::i32), Op);
6271 // Merge the pieces into a single i64 value.
6272 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
6273 }
6274
6275 return SDValue();
6276}
6277
6278/// getZeroVector - Returns a vector of specified type with all zero elements.
6279/// Zero vectors are used to represent vector negation and in those cases
6280/// will be implemented with the NEON VNEG instruction. However, VNEG does
6281/// not support i64 elements, so sometimes the zero vectors will need to be
6282/// explicitly constructed. Regardless, use a canonical VMOV to create the
6283/// zero vector.
6284static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
6285 assert(VT.isVector() && "Expected a vector type");
6286 // The canonical modified immediate encoding of a zero vector is....0!
6287 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
6288 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
6289 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
6290 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6291}
6292
6293/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
6294/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6295SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
6296 SelectionDAG &DAG) const {
6297 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6298 EVT VT = Op.getValueType();
6299 unsigned VTBits = VT.getSizeInBits();
6300 SDLoc dl(Op);
6301 SDValue ShOpLo = Op.getOperand(0);
6302 SDValue ShOpHi = Op.getOperand(1);
6303 SDValue ShAmt = Op.getOperand(2);
6304 SDValue ARMcc;
6305 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6306 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
6307
6308 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
6309
6310 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6311 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6312 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
6313 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6314 DAG.getConstant(VTBits, dl, MVT::i32));
6315 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
6316 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6317 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
6318 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6319 ISD::SETGE, ARMcc, DAG, dl);
6320 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
6321 ARMcc, CCR, CmpLo);
6322
6323 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
6324 SDValue HiBigShift = Opc == ISD::SRA
6325 ? DAG.getNode(Opc, dl, VT, ShOpHi,
6326 DAG.getConstant(VTBits - 1, dl, VT))
6327 : DAG.getConstant(0, dl, VT);
6328 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6329 ISD::SETGE, ARMcc, DAG, dl);
6330 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6331 ARMcc, CCR, CmpHi);
6332
6333 SDValue Ops[2] = { Lo, Hi };
6334 return DAG.getMergeValues(Ops, dl);
6335}
6336
6337/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
6338/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6339SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
6340 SelectionDAG &DAG) const {
6341 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6342 EVT VT = Op.getValueType();
6343 unsigned VTBits = VT.getSizeInBits();
6344 SDLoc dl(Op);
6345 SDValue ShOpLo = Op.getOperand(0);
6346 SDValue ShOpHi = Op.getOperand(1);
6347 SDValue ShAmt = Op.getOperand(2);
6348 SDValue ARMcc;
6349 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6350
6351 assert(Op.getOpcode() == ISD::SHL_PARTS);
6352 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6353 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6354 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
6355 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
6356 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6357
6358 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6359 DAG.getConstant(VTBits, dl, MVT::i32));
6360 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
6361 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6362 ISD::SETGE, ARMcc, DAG, dl);
6363 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6364 ARMcc, CCR, CmpHi);
6365
6366 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6367 ISD::SETGE, ARMcc, DAG, dl);
6368 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
6369 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
6370 DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
6371
6372 SDValue Ops[2] = { Lo, Hi };
6373 return DAG.getMergeValues(Ops, dl);
6374}
6375
6376SDValue ARMTargetLowering::LowerGET_ROUNDING(SDValue Op,
6377 SelectionDAG &DAG) const {
6378 // The rounding mode is in bits 23:22 of the FPSCR.
6379 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
6380 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
6381 // so that the shift + and get folded into a bitfield extract.
6382 SDLoc dl(Op);
6383 SDValue Chain = Op.getOperand(0);
6384 SDValue Ops[] = {Chain,
6385 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
6386
6387 SDValue FPSCR =
6388 DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
6389 Chain = FPSCR.getValue(1);
6390 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
6391 DAG.getConstant(1U << 22, dl, MVT::i32));
6392 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
6393 DAG.getConstant(22, dl, MVT::i32));
6394 SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
6395 DAG.getConstant(3, dl, MVT::i32));
6396 return DAG.getMergeValues({And, Chain}, dl);
6397}
6398
6399SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
6400 SelectionDAG &DAG) const {
6401 SDLoc DL(Op);
6402 SDValue Chain = Op->getOperand(0);
6403 SDValue RMValue = Op->getOperand(1);
6404
6405 // The rounding mode is in bits 23:22 of the FPSCR.
6406 // The llvm.set.rounding argument value to ARM rounding mode value mapping
6407 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
6408 // ((arg - 1) & 3) << 22).
6409 //
6410 // It is expected that the argument of llvm.set.rounding is within the
6411 // segment [0, 3], so NearestTiesToAway (4) is not handled here. It is
6412 // responsibility of the code generated llvm.set.rounding to ensure this
6413 // condition.
6414
6415 // Calculate new value of FPSCR[23:22].
6416 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
6417 DAG.getConstant(1, DL, MVT::i32));
6418 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
6419 DAG.getConstant(0x3, DL, MVT::i32));
6420 RMValue = DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
6421 DAG.getConstant(ARM::RoundingBitsPos, DL, MVT::i32));
6422
6423 // Get current value of FPSCR.
6424 SDValue Ops[] = {Chain,
6425 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6426 SDValue FPSCR =
6427 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6428 Chain = FPSCR.getValue(1);
6429 FPSCR = FPSCR.getValue(0);
6430
6431 // Put new rounding mode into FPSCR[23:22].
6432 const unsigned RMMask = ~(ARM::Rounding::rmMask << ARM::RoundingBitsPos);
6433 FPSCR = DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6434 DAG.getConstant(RMMask, DL, MVT::i32));
6435 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCR, RMValue);
6436 SDValue Ops2[] = {
6437 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6438 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6439}
6440
6441SDValue ARMTargetLowering::LowerSET_FPMODE(SDValue Op,
6442 SelectionDAG &DAG) const {
6443 SDLoc DL(Op);
6444 SDValue Chain = Op->getOperand(0);
6445 SDValue Mode = Op->getOperand(1);
6446
6447 // Generate nodes to build:
6448 // FPSCR = (FPSCR & FPStatusBits) | (Mode & ~FPStatusBits)
6449 SDValue Ops[] = {Chain,
6450 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6451 SDValue FPSCR =
6452 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6453 Chain = FPSCR.getValue(1);
6454 FPSCR = FPSCR.getValue(0);
6455
6456 SDValue FPSCRMasked =
6457 DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6458 DAG.getConstant(ARM::FPStatusBits, DL, MVT::i32));
6459 SDValue InputMasked =
6460 DAG.getNode(ISD::AND, DL, MVT::i32, Mode,
6461 DAG.getConstant(~ARM::FPStatusBits, DL, MVT::i32));
6462 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCRMasked, InputMasked);
6463
6464 SDValue Ops2[] = {
6465 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6466 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6467}
6468
6469SDValue ARMTargetLowering::LowerRESET_FPMODE(SDValue Op,
6470 SelectionDAG &DAG) const {
6471 SDLoc DL(Op);
6472 SDValue Chain = Op->getOperand(0);
6473
6474 // To get the default FP mode all control bits are cleared:
6475 // FPSCR = FPSCR & (FPStatusBits | FPReservedBits)
6476 SDValue Ops[] = {Chain,
6477 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6478 SDValue FPSCR =
6479 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6480 Chain = FPSCR.getValue(1);
6481 FPSCR = FPSCR.getValue(0);
6482
6483 SDValue FPSCRMasked = DAG.getNode(
6484 ISD::AND, DL, MVT::i32, FPSCR,
6486 SDValue Ops2[] = {Chain,
6487 DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32),
6488 FPSCRMasked};
6489 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6490}
6491
6493 const ARMSubtarget *ST) {
6494 SDLoc dl(N);
6495 EVT VT = N->getValueType(0);
6496 if (VT.isVector() && ST->hasNEON()) {
6497
6498 // Compute the least significant set bit: LSB = X & -X
6499 SDValue X = N->getOperand(0);
6500 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
6501 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
6502
6503 EVT ElemTy = VT.getVectorElementType();
6504
6505 if (ElemTy == MVT::i8) {
6506 // Compute with: cttz(x) = ctpop(lsb - 1)
6507 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6508 DAG.getTargetConstant(1, dl, ElemTy));
6509 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6510 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6511 }
6512
6513 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
6514 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
6515 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
6516 unsigned NumBits = ElemTy.getSizeInBits();
6517 SDValue WidthMinus1 =
6518 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6519 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
6520 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
6521 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
6522 }
6523
6524 // Compute with: cttz(x) = ctpop(lsb - 1)
6525
6526 // Compute LSB - 1.
6527 SDValue Bits;
6528 if (ElemTy == MVT::i64) {
6529 // Load constant 0xffff'ffff'ffff'ffff to register.
6530 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6531 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
6532 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
6533 } else {
6534 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6535 DAG.getTargetConstant(1, dl, ElemTy));
6536 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6537 }
6538 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6539 }
6540
6541 if (!ST->hasV6T2Ops())
6542 return SDValue();
6543
6544 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
6545 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
6546}
6547
6549 const ARMSubtarget *ST) {
6550 EVT VT = N->getValueType(0);
6551 SDLoc DL(N);
6552
6553 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
6554 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
6555 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
6556 "Unexpected type for custom ctpop lowering");
6557
6558 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6559 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
6560 SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6561 Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
6562
6563 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
6564 unsigned EltSize = 8;
6565 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
6566 while (EltSize != VT.getScalarSizeInBits()) {
6568 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
6569 TLI.getPointerTy(DAG.getDataLayout())));
6570 Ops.push_back(Res);
6571
6572 EltSize *= 2;
6573 NumElts /= 2;
6574 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
6575 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
6576 }
6577
6578 return Res;
6579}
6580
6581/// Getvshiftimm - Check if this is a valid build_vector for the immediate
6582/// operand of a vector shift operation, where all the elements of the
6583/// build_vector must have the same constant integer value.
6584static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6585 // Ignore bit_converts.
6586 while (Op.getOpcode() == ISD::BITCAST)
6587 Op = Op.getOperand(0);
6588 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
6589 APInt SplatBits, SplatUndef;
6590 unsigned SplatBitSize;
6591 bool HasAnyUndefs;
6592 if (!BVN ||
6593 !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6594 ElementBits) ||
6595 SplatBitSize > ElementBits)
6596 return false;
6597 Cnt = SplatBits.getSExtValue();
6598 return true;
6599}
6600
6601/// isVShiftLImm - Check if this is a valid build_vector for the immediate
6602/// operand of a vector shift left operation. That value must be in the range:
6603/// 0 <= Value < ElementBits for a left shift; or
6604/// 0 <= Value <= ElementBits for a long left shift.
6605static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6606 assert(VT.isVector() && "vector shift count is not a vector type");
6607 int64_t ElementBits = VT.getScalarSizeInBits();
6608 if (!getVShiftImm(Op, ElementBits, Cnt))
6609 return false;
6610 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6611}
6612
6613/// isVShiftRImm - Check if this is a valid build_vector for the immediate
6614/// operand of a vector shift right operation. For a shift opcode, the value
6615/// is positive, but for an intrinsic the value count must be negative. The
6616/// absolute value must be in the range:
6617/// 1 <= |Value| <= ElementBits for a right shift; or
6618/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6619static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
6620 int64_t &Cnt) {
6621 assert(VT.isVector() && "vector shift count is not a vector type");
6622 int64_t ElementBits = VT.getScalarSizeInBits();
6623 if (!getVShiftImm(Op, ElementBits, Cnt))
6624 return false;
6625 if (!isIntrinsic)
6626 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6627 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6628 Cnt = -Cnt;
6629 return true;
6630 }
6631 return false;
6632}
6633
6635 const ARMSubtarget *ST) {
6636 EVT VT = N->getValueType(0);
6637 SDLoc dl(N);
6638 int64_t Cnt;
6639
6640 if (!VT.isVector())
6641 return SDValue();
6642
6643 // We essentially have two forms here. Shift by an immediate and shift by a
6644 // vector register (there are also shift by a gpr, but that is just handled
6645 // with a tablegen pattern). We cannot easily match shift by an immediate in
6646 // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
6647 // For shifting by a vector, we don't have VSHR, only VSHL (which can be
6648 // signed or unsigned, and a negative shift indicates a shift right).
6649 if (N->getOpcode() == ISD::SHL) {
6650 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6651 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6652 DAG.getConstant(Cnt, dl, MVT::i32));
6653 return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6654 N->getOperand(1));
6655 }
6656
6657 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
6658 "unexpected vector shift opcode");
6659
6660 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6661 unsigned VShiftOpc =
6662 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6663 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6664 DAG.getConstant(Cnt, dl, MVT::i32));
6665 }
6666
6667 // Other right shifts we don't have operations for (we use a shift left by a
6668 // negative number).
6669 EVT ShiftVT = N->getOperand(1).getValueType();
6670 SDValue NegatedCount = DAG.getNode(
6671 ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6672 unsigned VShiftOpc =
6673 (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6674 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6675}
6676
6678 const ARMSubtarget *ST) {
6679 EVT VT = N->getValueType(0);
6680 SDLoc dl(N);
6681
6682 // We can get here for a node like i32 = ISD::SHL i32, i64
6683 if (VT != MVT::i64)
6684 return SDValue();
6685
6686 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
6687 N->getOpcode() == ISD::SHL) &&
6688 "Unknown shift to lower!");
6689
6690 unsigned ShOpc = N->getOpcode();
6691 if (ST->hasMVEIntegerOps()) {
6692 SDValue ShAmt = N->getOperand(1);
6693 unsigned ShPartsOpc = ARMISD::LSLL;
6694 ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
6695
6696 // If the shift amount is greater than 32 or has a greater bitwidth than 64
6697 // then do the default optimisation
6698 if ((!Con && ShAmt->getValueType(0).getSizeInBits() > 64) ||
6699 (Con && (Con->getAPIntValue() == 0 || Con->getAPIntValue().uge(32))))
6700 return SDValue();
6701
6702 // Extract the lower 32 bits of the shift amount if it's not an i32
6703 if (ShAmt->getValueType(0) != MVT::i32)
6704 ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
6705
6706 if (ShOpc == ISD::SRL) {
6707 if (!Con)
6708 // There is no t2LSRLr instruction so negate and perform an lsll if the
6709 // shift amount is in a register, emulating a right shift.
6710 ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6711 DAG.getConstant(0, dl, MVT::i32), ShAmt);
6712 else
6713 // Else generate an lsrl on the immediate shift amount
6714 ShPartsOpc = ARMISD::LSRL;
6715 } else if (ShOpc == ISD::SRA)
6716 ShPartsOpc = ARMISD::ASRL;
6717
6718 // Split Lower/Upper 32 bits of the destination/source
6719 SDValue Lo, Hi;
6720 std::tie(Lo, Hi) =
6721 DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6722 // Generate the shift operation as computed above
6723 Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
6724 ShAmt);
6725 // The upper 32 bits come from the second return value of lsll
6726 Hi = SDValue(Lo.getNode(), 1);
6727 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6728 }
6729
6730 // We only lower SRA, SRL of 1 here, all others use generic lowering.
6731 if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6732 return SDValue();
6733
6734 // If we are in thumb mode, we don't have RRX.
6735 if (ST->isThumb1Only())
6736 return SDValue();
6737
6738 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
6739 SDValue Lo, Hi;
6740 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6741
6742 // First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and
6743 // captures the result into a carry flag.
6744 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
6745 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
6746
6747 // The low part is an ARMISD::RRX operand, which shifts the carry in.
6748 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
6749
6750 // Merge the pieces into a single i64 value.
6751 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6752}
6753
6755 const ARMSubtarget *ST) {
6756 bool Invert = false;
6757 bool Swap = false;
6758 unsigned Opc = ARMCC::AL;
6759
6760 SDValue Op0 = Op.getOperand(0);
6761 SDValue Op1 = Op.getOperand(1);
6762 SDValue CC = Op.getOperand(2);
6763 EVT VT = Op.getValueType();
6764 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6765 SDLoc dl(Op);
6766
6767 EVT CmpVT;
6768 if (ST->hasNEON())
6770 else {
6771 assert(ST->hasMVEIntegerOps() &&
6772 "No hardware support for integer vector comparison!");
6773
6774 if (Op.getValueType().getVectorElementType() != MVT::i1)
6775 return SDValue();
6776
6777 // Make sure we expand floating point setcc to scalar if we do not have
6778 // mve.fp, so that we can handle them from there.
6779 if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6780 return SDValue();
6781
6782 CmpVT = VT;
6783 }
6784
6785 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
6786 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
6787 // Special-case integer 64-bit equality comparisons. They aren't legal,
6788 // but they can be lowered with a few vector instructions.
6789 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
6790 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
6791 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
6792 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
6793 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
6794 DAG.getCondCode(ISD::SETEQ));
6795 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6796 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
6797 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
6798 if (SetCCOpcode == ISD::SETNE)
6799 Merged = DAG.getNOT(dl, Merged, CmpVT);
6800 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
6801 return Merged;
6802 }
6803
6804 if (CmpVT.getVectorElementType() == MVT::i64)
6805 // 64-bit comparisons are not legal in general.
6806 return SDValue();
6807
6808 if (Op1.getValueType().isFloatingPoint()) {
6809 switch (SetCCOpcode) {
6810 default: llvm_unreachable("Illegal FP comparison");
6811 case ISD::SETUNE:
6812 case ISD::SETNE:
6813 if (ST->hasMVEFloatOps()) {
6814 Opc = ARMCC::NE; break;
6815 } else {
6816 Invert = true; [[fallthrough]];
6817 }
6818 case ISD::SETOEQ:
6819 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6820 case ISD::SETOLT:
6821 case ISD::SETLT: Swap = true; [[fallthrough]];
6822 case ISD::SETOGT:
6823 case ISD::SETGT: Opc = ARMCC::GT; break;
6824 case ISD::SETOLE:
6825 case ISD::SETLE: Swap = true; [[fallthrough]];
6826 case ISD::SETOGE:
6827 case ISD::SETGE: Opc = ARMCC::GE; break;
6828 case ISD::SETUGE: Swap = true; [[fallthrough]];
6829 case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
6830 case ISD::SETUGT: Swap = true; [[fallthrough]];
6831 case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
6832 case ISD::SETUEQ: Invert = true; [[fallthrough]];
6833 case ISD::SETONE: {
6834 // Expand this to (OLT | OGT).
6835 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6836 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6837 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6838 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6839 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6840 if (Invert)
6841 Result = DAG.getNOT(dl, Result, VT);
6842 return Result;
6843 }
6844 case ISD::SETUO: Invert = true; [[fallthrough]];
6845 case ISD::SETO: {
6846 // Expand this to (OLT | OGE).
6847 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6848 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6849 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6850 DAG.getConstant(ARMCC::GE, dl, MVT::i32));
6851 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6852 if (Invert)
6853 Result = DAG.getNOT(dl, Result, VT);
6854 return Result;
6855 }
6856 }
6857 } else {
6858 // Integer comparisons.
6859 switch (SetCCOpcode) {
6860 default: llvm_unreachable("Illegal integer comparison");
6861 case ISD::SETNE:
6862 if (ST->hasMVEIntegerOps()) {
6863 Opc = ARMCC::NE; break;
6864 } else {
6865 Invert = true; [[fallthrough]];
6866 }
6867 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6868 case ISD::SETLT: Swap = true; [[fallthrough]];
6869 case ISD::SETGT: Opc = ARMCC::GT; break;
6870 case ISD::SETLE: Swap = true; [[fallthrough]];
6871 case ISD::SETGE: Opc = ARMCC::GE; break;
6872 case ISD::SETULT: Swap = true; [[fallthrough]];
6873 case ISD::SETUGT: Opc = ARMCC::HI; break;
6874 case ISD::SETULE: Swap = true; [[fallthrough]];
6875 case ISD::SETUGE: Opc = ARMCC::HS; break;
6876 }
6877
6878 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
6879 if (ST->hasNEON() && Opc == ARMCC::EQ) {
6880 SDValue AndOp;
6882 AndOp = Op0;
6883 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
6884 AndOp = Op1;
6885
6886 // Ignore bitconvert.
6887 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
6888 AndOp = AndOp.getOperand(0);
6889
6890 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
6891 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
6892 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
6893 SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
6894 if (!Invert)
6895 Result = DAG.getNOT(dl, Result, VT);
6896 return Result;
6897 }
6898 }
6899 }
6900
6901 if (Swap)
6902 std::swap(Op0, Op1);
6903
6904 // If one of the operands is a constant vector zero, attempt to fold the
6905 // comparison to a specialized compare-against-zero form.
6907 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ ||
6908 Opc == ARMCC::NE)) {
6909 if (Opc == ARMCC::GE)
6910 Opc = ARMCC::LE;
6911 else if (Opc == ARMCC::GT)
6912 Opc = ARMCC::LT;
6913 std::swap(Op0, Op1);
6914 }
6915
6916 SDValue Result;
6918 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE ||
6919 Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ))
6920 Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,
6921 DAG.getConstant(Opc, dl, MVT::i32));
6922 else
6923 Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6924 DAG.getConstant(Opc, dl, MVT::i32));
6925
6926 Result = DAG.getSExtOrTrunc(Result, dl, VT);
6927
6928 if (Invert)
6929 Result = DAG.getNOT(dl, Result, VT);
6930
6931 return Result;
6932}
6933
6935 SDValue LHS = Op.getOperand(0);
6936 SDValue RHS = Op.getOperand(1);
6937 SDValue Carry = Op.getOperand(2);
6938 SDValue Cond = Op.getOperand(3);
6939 SDLoc DL(Op);
6940
6941 assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
6942
6943 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
6944 // have to invert the carry first.
6945 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
6946 DAG.getConstant(1, DL, MVT::i32), Carry);
6947 // This converts the boolean value carry into the carry flag.
6948 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
6949
6950 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
6951 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
6952
6953 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
6954 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
6955 SDValue ARMcc = DAG.getConstant(
6956 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
6957 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6958 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
6959 Cmp.getValue(1), SDValue());
6960 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
6961 CCR, Chain.getValue(1));
6962}
6963
6964/// isVMOVModifiedImm - Check if the specified splat value corresponds to a
6965/// valid vector constant for a NEON or MVE instruction with a "modified
6966/// immediate" operand (e.g., VMOV). If so, return the encoded value.
6967static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
6968 unsigned SplatBitSize, SelectionDAG &DAG,
6969 const SDLoc &dl, EVT &VT, EVT VectorVT,
6970 VMOVModImmType type) {
6971 unsigned OpCmode, Imm;
6972 bool is128Bits = VectorVT.is128BitVector();
6973
6974 // SplatBitSize is set to the smallest size that splats the vector, so a
6975 // zero vector will always have SplatBitSize == 8. However, NEON modified
6976 // immediate instructions others than VMOV do not support the 8-bit encoding
6977 // of a zero vector, and the default encoding of zero is supposed to be the
6978 // 32-bit version.
6979 if (SplatBits == 0)
6980 SplatBitSize = 32;
6981
6982 switch (SplatBitSize) {
6983 case 8:
6984 if (type != VMOVModImm)
6985 return SDValue();
6986 // Any 1-byte value is OK. Op=0, Cmode=1110.
6987 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
6988 OpCmode = 0xe;
6989 Imm = SplatBits;
6990 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
6991 break;
6992
6993 case 16:
6994 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
6995 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
6996 if ((SplatBits & ~0xff) == 0) {
6997 // Value = 0x00nn: Op=x, Cmode=100x.
6998 OpCmode = 0x8;
6999 Imm = SplatBits;
7000 break;
7001 }
7002 if ((SplatBits & ~0xff00) == 0) {
7003 // Value = 0xnn00: Op=x, Cmode=101x.
7004 OpCmode = 0xa;
7005 Imm = SplatBits >> 8;
7006 break;
7007 }
7008 return SDValue();
7009
7010 case 32:
7011 // NEON's 32-bit VMOV supports splat values where:
7012 // * only one byte is nonzero, or
7013 // * the least significant byte is 0xff and the second byte is nonzero, or
7014 // * the least significant 2 bytes are 0xff and the third is nonzero.
7015 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
7016 if ((SplatBits & ~0xff) == 0) {
7017 // Value = 0x000000nn: Op=x, Cmode=000x.
7018 OpCmode = 0;
7019 Imm = SplatBits;
7020 break;
7021 }
7022 if ((SplatBits & ~0xff00) == 0) {
7023 // Value = 0x0000nn00: Op=x, Cmode=001x.
7024 OpCmode = 0x2;
7025 Imm = SplatBits >> 8;
7026 break;
7027 }
7028 if ((SplatBits & ~0xff0000) == 0) {
7029 // Value = 0x00nn0000: Op=x, Cmode=010x.
7030 OpCmode = 0x4;
7031 Imm = SplatBits >> 16;
7032 break;
7033 }
7034 if ((SplatBits & ~0xff000000) == 0) {
7035 // Value = 0xnn000000: Op=x, Cmode=011x.
7036 OpCmode = 0x6;
7037 Imm = SplatBits >> 24;
7038 break;
7039 }
7040
7041 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
7042 if (type == OtherModImm) return SDValue();
7043
7044 if ((SplatBits & ~0xffff) == 0 &&
7045 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
7046 // Value = 0x0000nnff: Op=x, Cmode=1100.
7047 OpCmode = 0xc;
7048 Imm = SplatBits >> 8;
7049 break;
7050 }
7051
7052 // cmode == 0b1101 is not supported for MVE VMVN
7053 if (type == MVEVMVNModImm)
7054 return SDValue();
7055
7056 if ((SplatBits & ~0xffffff) == 0 &&
7057 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
7058 // Value = 0x00nnffff: Op=x, Cmode=1101.
7059 OpCmode = 0xd;
7060 Imm = SplatBits >> 16;
7061 break;
7062 }
7063
7064 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
7065 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
7066 // VMOV.I32. A (very) minor optimization would be to replicate the value
7067 // and fall through here to test for a valid 64-bit splat. But, then the
7068 // caller would also need to check and handle the change in size.
7069 return SDValue();
7070
7071 case 64: {
7072 if (type != VMOVModImm)
7073 return SDValue();
7074 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
7075 uint64_t BitMask = 0xff;
7076 unsigned ImmMask = 1;
7077 Imm = 0;
7078 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
7079 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
7080 Imm |= ImmMask;
7081 } else if ((SplatBits & BitMask) != 0) {
7082 return SDValue();
7083 }
7084 BitMask <<= 8;
7085 ImmMask <<= 1;
7086 }
7087
7088 if (DAG.getDataLayout().isBigEndian()) {
7089 // Reverse the order of elements within the vector.
7090 unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
7091 unsigned Mask = (1 << BytesPerElem) - 1;
7092 unsigned NumElems = 8 / BytesPerElem;
7093 unsigned NewImm = 0;
7094 for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
7095 unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
7096 NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
7097 }
7098 Imm = NewImm;
7099 }
7100
7101 // Op=1, Cmode=1110.
7102 OpCmode = 0x1e;
7103 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
7104 break;
7105 }
7106
7107 default:
7108 llvm_unreachable("unexpected size for isVMOVModifiedImm");
7109 }
7110
7111 unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);
7112 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
7113}
7114
7115SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
7116 const ARMSubtarget *ST) const {
7117 EVT VT = Op.getValueType();
7118 bool IsDouble = (VT == MVT::f64);
7119 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
7120 const APFloat &FPVal = CFP->getValueAPF();
7121
7122 // Prevent floating-point constants from using literal loads
7123 // when execute-only is enabled.
7124 if (ST->genExecuteOnly()) {
7125 // We shouldn't trigger this for v6m execute-only
7126 assert((!ST->isThumb1Only() || ST->hasV8MBaselineOps()) &&
7127 "Unexpected architecture");
7128
7129 // If we can represent the constant as an immediate, don't lower it
7130 if (isFPImmLegal(FPVal, VT))
7131 return Op;
7132 // Otherwise, construct as integer, and move to float register
7133 APInt INTVal = FPVal.bitcastToAPInt();
7134 SDLoc DL(CFP);
7135 switch (VT.getSimpleVT().SimpleTy) {
7136 default:
7137 llvm_unreachable("Unknown floating point type!");
7138 break;
7139 case MVT::f64: {
7140 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
7141 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
7142 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
7143 }
7144 case MVT::f32:
7145 return DAG.getNode(ARMISD::VMOVSR, DL, VT,
7146 DAG.getConstant(INTVal, DL, MVT::i32));
7147 }
7148 }
7149
7150 if (!ST->hasVFP3Base())
7151 return SDValue();
7152
7153 // Use the default (constant pool) lowering for double constants when we have
7154 // an SP-only FPU
7155 if (IsDouble && !Subtarget->hasFP64())
7156 return SDValue();
7157
7158 // Try splatting with a VMOV.f32...
7159 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
7160
7161 if (ImmVal != -1) {
7162 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
7163 // We have code in place to select a valid ConstantFP already, no need to
7164 // do any mangling.
7165 return Op;
7166 }
7167
7168 // It's a float and we are trying to use NEON operations where
7169 // possible. Lower it to a splat followed by an extract.
7170 SDLoc DL(Op);
7171 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
7172 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
7173 NewVal);
7174 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
7175 DAG.getConstant(0, DL, MVT::i32));
7176 }
7177
7178 // The rest of our options are NEON only, make sure that's allowed before
7179 // proceeding..
7180 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
7181 return SDValue();
7182
7183 EVT VMovVT;
7184 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
7185
7186 // It wouldn't really be worth bothering for doubles except for one very
7187 // important value, which does happen to match: 0.0. So make sure we don't do
7188 // anything stupid.
7189 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
7190 return SDValue();
7191
7192 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
7193 SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
7194 VMovVT, VT, VMOVModImm);
7195 if (NewVal != SDValue()) {
7196 SDLoc DL(Op);
7197 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
7198 NewVal);
7199 if (IsDouble)
7200 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7201
7202 // It's a float: cast and extract a vector element.
7203 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7204 VecConstant);
7205 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7206 DAG.getConstant(0, DL, MVT::i32));
7207 }
7208
7209 // Finally, try a VMVN.i32
7210 NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
7211 VT, VMVNModImm);
7212 if (NewVal != SDValue()) {
7213 SDLoc DL(Op);
7214 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
7215
7216 if (IsDouble)
7217 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7218
7219 // It's a float: cast and extract a vector element.
7220 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7221 VecConstant);
7222 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7223 DAG.getConstant(0, DL, MVT::i32));
7224 }
7225
7226 return SDValue();
7227}
7228
7229// check if an VEXT instruction can handle the shuffle mask when the
7230// vector sources of the shuffle are the same.
7231static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
7232 unsigned NumElts = VT.getVectorNumElements();
7233
7234 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7235 if (M[0] < 0)
7236 return false;
7237
7238 Imm = M[0];
7239
7240 // If this is a VEXT shuffle, the immediate value is the index of the first
7241 // element. The other shuffle indices must be the successive elements after
7242 // the first one.
7243 unsigned ExpectedElt = Imm;
7244 for (unsigned i = 1; i < NumElts; ++i) {
7245 // Increment the expected index. If it wraps around, just follow it
7246 // back to index zero and keep going.
7247 ++ExpectedElt;
7248 if (ExpectedElt == NumElts)
7249 ExpectedElt = 0;
7250
7251 if (M[i] < 0) continue; // ignore UNDEF indices
7252 if (ExpectedElt != static_cast<unsigned>(M[i]))
7253 return false;
7254 }
7255
7256 return true;
7257}
7258
7259static bool isVEXTMask(ArrayRef<int> M, EVT VT,
7260 bool &ReverseVEXT, unsigned &Imm) {
7261 unsigned NumElts = VT.getVectorNumElements();
7262 ReverseVEXT = false;
7263
7264 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7265 if (M[0] < 0)
7266 return false;
7267
7268 Imm = M[0];
7269
7270 // If this is a VEXT shuffle, the immediate value is the index of the first
7271 // element. The other shuffle indices must be the successive elements after
7272 // the first one.
7273 unsigned ExpectedElt = Imm;
7274 for (unsigned i = 1; i < NumElts; ++i) {
7275 // Increment the expected index. If it wraps around, it may still be
7276 // a VEXT but the source vectors must be swapped.
7277 ExpectedElt += 1;
7278 if (ExpectedElt == NumElts * 2) {
7279 ExpectedElt = 0;
7280 ReverseVEXT = true;
7281 }
7282
7283 if (M[i] < 0) continue; // ignore UNDEF indices
7284 if (ExpectedElt != static_cast<unsigned>(M[i]))
7285 return false;
7286 }
7287
7288 // Adjust the index value if the source operands will be swapped.
7289 if (ReverseVEXT)
7290 Imm -= NumElts;
7291
7292 return true;
7293}
7294
7295static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
7296 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
7297 // range, then 0 is placed into the resulting vector. So pretty much any mask
7298 // of 8 elements can work here.
7299 return VT == MVT::v8i8 && M.size() == 8;
7300}
7301
7302static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
7303 unsigned Index) {
7304 if (Mask.size() == Elements * 2)
7305 return Index / Elements;
7306 return Mask[Index] == 0 ? 0 : 1;
7307}
7308
7309// Checks whether the shuffle mask represents a vector transpose (VTRN) by
7310// checking that pairs of elements in the shuffle mask represent the same index
7311// in each vector, incrementing the expected index by 2 at each step.
7312// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
7313// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
7314// v2={e,f,g,h}
7315// WhichResult gives the offset for each element in the mask based on which
7316// of the two results it belongs to.
7317//
7318// The transpose can be represented either as:
7319// result1 = shufflevector v1, v2, result1_shuffle_mask
7320// result2 = shufflevector v1, v2, result2_shuffle_mask
7321// where v1/v2 and the shuffle masks have the same number of elements
7322// (here WhichResult (see below) indicates which result is being checked)
7323//
7324// or as:
7325// results = shufflevector v1, v2, shuffle_mask
7326// where both results are returned in one vector and the shuffle mask has twice
7327// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
7328// want to check the low half and high half of the shuffle mask as if it were
7329// the other case
7330static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7331 unsigned EltSz = VT.getScalarSizeInBits();
7332 if (EltSz == 64)
7333 return false;
7334
7335 unsigned NumElts = VT.getVectorNumElements();
7336 if (M.size() != NumElts && M.size() != NumElts*2)
7337 return false;
7338
7339 // If the mask is twice as long as the input vector then we need to check the
7340 // upper and lower parts of the mask with a matching value for WhichResult
7341 // FIXME: A mask with only even values will be rejected in case the first
7342 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
7343 // M[0] is used to determine WhichResult
7344 for (unsigned i = 0; i < M.size(); i += NumElts) {
7345 WhichResult = SelectPairHalf(NumElts, M, i);
7346 for (unsigned j = 0; j < NumElts; j += 2) {
7347 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7348 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
7349 return false;
7350 }
7351 }
7352
7353 if (M.size() == NumElts*2)
7354 WhichResult = 0;
7355
7356 return true;
7357}
7358
7359/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
7360/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7361/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
7362static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7363 unsigned EltSz = VT.getScalarSizeInBits();
7364 if (EltSz == 64)
7365 return false;
7366
7367 unsigned NumElts = VT.getVectorNumElements();
7368 if (M.size() != NumElts && M.size() != NumElts*2)
7369 return false;
7370
7371 for (unsigned i = 0; i < M.size(); i += NumElts) {
7372 WhichResult = SelectPairHalf(NumElts, M, i);
7373 for (unsigned j = 0; j < NumElts; j += 2) {
7374 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7375 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
7376 return false;
7377 }
7378 }
7379
7380 if (M.size() == NumElts*2)
7381 WhichResult = 0;
7382
7383 return true;
7384}
7385
7386// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
7387// that the mask elements are either all even and in steps of size 2 or all odd
7388// and in steps of size 2.
7389// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
7390// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
7391// v2={e,f,g,h}
7392// Requires similar checks to that of isVTRNMask with
7393// respect the how results are returned.
7394static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7395 unsigned EltSz = VT.getScalarSizeInBits();
7396 if (EltSz == 64)
7397 return false;
7398
7399 unsigned NumElts = VT.getVectorNumElements();
7400 if (M.size() != NumElts && M.size() != NumElts*2)
7401 return false;
7402
7403 for (unsigned i = 0; i < M.size(); i += NumElts) {
7404 WhichResult = SelectPairHalf(NumElts, M, i);
7405 for (unsigned j = 0; j < NumElts; ++j) {
7406 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
7407 return false;
7408 }
7409 }
7410
7411 if (M.size() == NumElts*2)
7412 WhichResult = 0;
7413
7414 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7415 if (VT.is64BitVector() && EltSz == 32)
7416 return false;
7417
7418 return true;
7419}
7420
7421/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
7422/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7423/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
7424static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7425 unsigned EltSz = VT.getScalarSizeInBits();
7426 if (EltSz == 64)
7427 return false;
7428
7429 unsigned NumElts = VT.getVectorNumElements();
7430 if (M.size() != NumElts && M.size() != NumElts*2)
7431 return false;
7432
7433 unsigned Half = NumElts / 2;
7434 for (unsigned i = 0; i < M.size(); i += NumElts) {
7435 WhichResult = SelectPairHalf(NumElts, M, i);
7436 for (unsigned j = 0; j < NumElts; j += Half) {
7437 unsigned Idx = WhichResult;
7438 for (unsigned k = 0; k < Half; ++k) {
7439 int MIdx = M[i + j + k];
7440 if (MIdx >= 0 && (unsigned) MIdx != Idx)
7441 return false;
7442 Idx += 2;
7443 }
7444 }
7445 }
7446
7447 if (M.size() == NumElts*2)
7448 WhichResult = 0;
7449
7450 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7451 if (VT.is64BitVector() && EltSz == 32)
7452 return false;
7453
7454 return true;
7455}
7456
7457// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
7458// that pairs of elements of the shufflemask represent the same index in each
7459// vector incrementing sequentially through the vectors.
7460// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
7461// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
7462// v2={e,f,g,h}
7463// Requires similar checks to that of isVTRNMask with respect the how results
7464// are returned.
7465static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7466 unsigned EltSz = VT.getScalarSizeInBits();
7467 if (EltSz == 64)
7468 return false;
7469
7470 unsigned NumElts = VT.getVectorNumElements();
7471 if (M.size() != NumElts && M.size() != NumElts*2)
7472 return false;
7473
7474 for (unsigned i = 0; i < M.size(); i += NumElts) {
7475 WhichResult = SelectPairHalf(NumElts, M, i);
7476 unsigned Idx = WhichResult * NumElts / 2;
7477 for (unsigned j = 0; j < NumElts; j += 2) {
7478 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7479 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
7480 return false;
7481 Idx += 1;
7482 }
7483 }
7484
7485 if (M.size() == NumElts*2)
7486 WhichResult = 0;
7487
7488 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7489 if (VT.is64BitVector() && EltSz == 32)
7490 return false;
7491
7492 return true;
7493}
7494
7495/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
7496/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7497/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
7498static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7499 unsigned EltSz = VT.getScalarSizeInBits();
7500 if (EltSz == 64)
7501 return false;
7502
7503 unsigned NumElts = VT.getVectorNumElements();
7504 if (M.size() != NumElts && M.size() != NumElts*2)
7505 return false;
7506
7507 for (unsigned i = 0; i < M.size(); i += NumElts) {
7508 WhichResult = SelectPairHalf(NumElts, M, i);
7509 unsigned Idx = WhichResult * NumElts / 2;
7510 for (unsigned j = 0; j < NumElts; j += 2) {
7511 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7512 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
7513 return false;
7514 Idx += 1;
7515 }
7516 }
7517
7518 if (M.size() == NumElts*2)
7519 WhichResult = 0;
7520
7521 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7522 if (VT.is64BitVector() && EltSz == 32)
7523 return false;
7524
7525 return true;
7526}
7527
7528/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7529/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
7530static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
7531 unsigned &WhichResult,
7532 bool &isV_UNDEF) {
7533 isV_UNDEF = false;
7534 if (isVTRNMask(ShuffleMask, VT, WhichResult))
7535 return ARMISD::VTRN;
7536 if (isVUZPMask(ShuffleMask, VT, WhichResult))
7537 return ARMISD::VUZP;
7538 if (isVZIPMask(ShuffleMask, VT, WhichResult))
7539 return ARMISD::VZIP;
7540
7541 isV_UNDEF = true;
7542 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
7543 return ARMISD::VTRN;
7544 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7545 return ARMISD::VUZP;
7546 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7547 return ARMISD::VZIP;
7548
7549 return 0;
7550}
7551
7552/// \return true if this is a reverse operation on an vector.
7553static bool isReverseMask(ArrayRef<int> M, EVT VT) {
7554 unsigned NumElts = VT.getVectorNumElements();
7555 // Make sure the mask has the right size.
7556 if (NumElts != M.size())
7557 return false;
7558
7559 // Look for <15, ..., 3, -1, 1, 0>.
7560 for (unsigned i = 0; i != NumElts; ++i)
7561 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7562 return false;
7563
7564 return true;
7565}
7566
7567static bool isTruncMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7568 unsigned NumElts = VT.getVectorNumElements();
7569 // Make sure the mask has the right size.
7570 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7571 return false;
7572
7573 // Half-width truncation patterns (e.g. v4i32 -> v8i16):
7574 // !Top && SingleSource: <0, 2, 4, 6, 0, 2, 4, 6>
7575 // !Top && !SingleSource: <0, 2, 4, 6, 8, 10, 12, 14>
7576 // Top && SingleSource: <1, 3, 5, 7, 1, 3, 5, 7>
7577 // Top && !SingleSource: <1, 3, 5, 7, 9, 11, 13, 15>
7578 int Ofs = Top ? 1 : 0;
7579 int Upper = SingleSource ? 0 : NumElts;
7580 for (int i = 0, e = NumElts / 2; i != e; ++i) {
7581 if (M[i] >= 0 && M[i] != (i * 2) + Ofs)
7582 return false;
7583 if (M[i + e] >= 0 && M[i + e] != (i * 2) + Ofs + Upper)
7584 return false;
7585 }
7586 return true;
7587}
7588
7589static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7590 unsigned NumElts = VT.getVectorNumElements();
7591 // Make sure the mask has the right size.
7592 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7593 return false;
7594
7595 // If Top
7596 // Look for <0, N, 2, N+2, 4, N+4, ..>.
7597 // This inserts Input2 into Input1
7598 // else if not Top
7599 // Look for <0, N+1, 2, N+3, 4, N+5, ..>
7600 // This inserts Input1 into Input2
7601 unsigned Offset = Top ? 0 : 1;
7602 unsigned N = SingleSource ? 0 : NumElts;
7603 for (unsigned i = 0; i < NumElts; i += 2) {
7604 if (M[i] >= 0 && M[i] != (int)i)
7605 return false;
7606 if (M[i + 1] >= 0 && M[i + 1] != (int)(N + i + Offset))
7607 return false;
7608 }
7609
7610 return true;
7611}
7612
7613static bool isVMOVNTruncMask(ArrayRef<int> M, EVT ToVT, bool rev) {
7614 unsigned NumElts = ToVT.getVectorNumElements();
7615 if (NumElts != M.size())
7616 return false;
7617
7618 // Test if the Trunc can be convertable to a VMOVN with this shuffle. We are
7619 // looking for patterns of:
7620 // !rev: 0 N/2 1 N/2+1 2 N/2+2 ...
7621 // rev: N/2 0 N/2+1 1 N/2+2 2 ...
7622
7623 unsigned Off0 = rev ? NumElts / 2 : 0;
7624 unsigned Off1 = rev ? 0 : NumElts / 2;
7625 for (unsigned i = 0; i < NumElts; i += 2) {
7626 if (M[i] >= 0 && M[i] != (int)(Off0 + i / 2))
7627 return false;
7628 if (M[i + 1] >= 0 && M[i + 1] != (int)(Off1 + i / 2))
7629 return false;
7630 }
7631
7632 return true;
7633}
7634
7635// Reconstruct an MVE VCVT from a BuildVector of scalar fptrunc, all extracted
7636// from a pair of inputs. For example:
7637// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7638// FP_ROUND(EXTRACT_ELT(Y, 0),
7639// FP_ROUND(EXTRACT_ELT(X, 1),
7640// FP_ROUND(EXTRACT_ELT(Y, 1), ...)
7642 const ARMSubtarget *ST) {
7643 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7644 if (!ST->hasMVEFloatOps())
7645 return SDValue();
7646
7647 SDLoc dl(BV);
7648 EVT VT = BV.getValueType();
7649 if (VT != MVT::v8f16)
7650 return SDValue();
7651
7652 // We are looking for a buildvector of fptrunc elements, where all the
7653 // elements are interleavingly extracted from two sources. Check the first two
7654 // items are valid enough and extract some info from them (they are checked
7655 // properly in the loop below).
7656 if (BV.getOperand(0).getOpcode() != ISD::FP_ROUND ||
7659 return SDValue();
7660 if (BV.getOperand(1).getOpcode() != ISD::FP_ROUND ||
7663 return SDValue();
7664 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7665 SDValue Op1 = BV.getOperand(1).getOperand(0).getOperand(0);
7666 if (Op0.getValueType() != MVT::v4f32 || Op1.getValueType() != MVT::v4f32)
7667 return SDValue();
7668
7669 // Check all the values in the BuildVector line up with our expectations.
7670 for (unsigned i = 1; i < 4; i++) {
7671 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7672 return Trunc.getOpcode() == ISD::FP_ROUND &&
7674 Trunc.getOperand(0).getOperand(0) == Op &&
7675 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7676 };
7677 if (!Check(BV.getOperand(i * 2 + 0), Op0, i))
7678 return SDValue();
7679 if (!Check(BV.getOperand(i * 2 + 1), Op1, i))
7680 return SDValue();
7681 }
7682
7683 SDValue N1 = DAG.getNode(ARMISD::VCVTN, dl, VT, DAG.getUNDEF(VT), Op0,
7684 DAG.getConstant(0, dl, MVT::i32));
7685 return DAG.getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
7686 DAG.getConstant(1, dl, MVT::i32));
7687}
7688
7689// Reconstruct an MVE VCVT from a BuildVector of scalar fpext, all extracted
7690// from a single input on alternating lanes. For example:
7691// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7692// FP_ROUND(EXTRACT_ELT(X, 2),
7693// FP_ROUND(EXTRACT_ELT(X, 4), ...)
7695 const ARMSubtarget *ST) {
7696 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7697 if (!ST->hasMVEFloatOps())
7698 return SDValue();
7699
7700 SDLoc dl(BV);
7701 EVT VT = BV.getValueType();
7702 if (VT != MVT::v4f32)
7703 return SDValue();
7704
7705 // We are looking for a buildvector of fptext elements, where all the
7706 // elements are alternating lanes from a single source. For example <0,2,4,6>
7707 // or <1,3,5,7>. Check the first two items are valid enough and extract some
7708 // info from them (they are checked properly in the loop below).
7709 if (BV.getOperand(0).getOpcode() != ISD::FP_EXTEND ||
7711 return SDValue();
7712 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7714 if (Op0.getValueType() != MVT::v8f16 || (Offset != 0 && Offset != 1))
7715 return SDValue();
7716
7717 // Check all the values in the BuildVector line up with our expectations.
7718 for (unsigned i = 1; i < 4; i++) {
7719 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7720 return Trunc.getOpcode() == ISD::FP_EXTEND &&
7722 Trunc.getOperand(0).getOperand(0) == Op &&
7723 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7724 };
7725 if (!Check(BV.getOperand(i), Op0, 2 * i + Offset))
7726 return SDValue();
7727 }
7728
7729 return DAG.getNode(ARMISD::VCVTL, dl, VT, Op0,
7730 DAG.getConstant(Offset, dl, MVT::i32));
7731}
7732
7733// If N is an integer constant that can be moved into a register in one
7734// instruction, return an SDValue of such a constant (will become a MOV
7735// instruction). Otherwise return null.
7737 const ARMSubtarget *ST, const SDLoc &dl) {
7738 uint64_t Val;
7739 if (!isa<ConstantSDNode>(N))
7740 return SDValue();
7741 Val = N->getAsZExtVal();
7742
7743 if (ST->isThumb1Only()) {
7744 if (Val <= 255 || ~Val <= 255)
7745 return DAG.getConstant(Val, dl, MVT::i32);
7746 } else {
7747 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7748 return DAG.getConstant(Val, dl, MVT::i32);
7749 }
7750 return SDValue();
7751}
7752
7754 const ARMSubtarget *ST) {
7755 SDLoc dl(Op);
7756 EVT VT = Op.getValueType();
7757
7758 assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!");
7759
7760 unsigned NumElts = VT.getVectorNumElements();
7761 unsigned BoolMask;
7762 unsigned BitsPerBool;
7763 if (NumElts == 2) {
7764 BitsPerBool = 8;
7765 BoolMask = 0xff;
7766 } else if (NumElts == 4) {
7767 BitsPerBool = 4;
7768 BoolMask = 0xf;
7769 } else if (NumElts == 8) {
7770 BitsPerBool = 2;
7771 BoolMask = 0x3;
7772 } else if (NumElts == 16) {
7773 BitsPerBool = 1;
7774 BoolMask = 0x1;
7775 } else
7776 return SDValue();
7777
7778 // If this is a single value copied into all lanes (a splat), we can just sign
7779 // extend that single value
7780 SDValue FirstOp = Op.getOperand(0);
7781 if (!isa<ConstantSDNode>(FirstOp) &&
7782 llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) {
7783 return U.get().isUndef() || U.get() == FirstOp;
7784 })) {
7785 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
7786 DAG.getValueType(MVT::i1));
7787 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
7788 }
7789
7790 // First create base with bits set where known
7791 unsigned Bits32 = 0;
7792 for (unsigned i = 0; i < NumElts; ++i) {
7793 SDValue V = Op.getOperand(i);
7794 if (!isa<ConstantSDNode>(V) && !V.isUndef())
7795 continue;
7796 bool BitSet = V.isUndef() ? false : V->getAsZExtVal();
7797 if (BitSet)
7798 Bits32 |= BoolMask << (i * BitsPerBool);
7799 }
7800
7801 // Add in unknown nodes
7803 DAG.getConstant(Bits32, dl, MVT::i32));
7804 for (unsigned i = 0; i < NumElts; ++i) {
7805 SDValue V = Op.getOperand(i);
7806 if (isa<ConstantSDNode>(V) || V.isUndef())
7807 continue;
7808 Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,
7809 DAG.getConstant(i, dl, MVT::i32));
7810 }
7811
7812 return Base;
7813}
7814
7816 const ARMSubtarget *ST) {
7817 if (!ST->hasMVEIntegerOps())
7818 return SDValue();
7819
7820 // We are looking for a buildvector where each element is Op[0] + i*N
7821 EVT VT = Op.getValueType();
7822 SDValue Op0 = Op.getOperand(0);
7823 unsigned NumElts = VT.getVectorNumElements();
7824
7825 // Get the increment value from operand 1
7826 SDValue Op1 = Op.getOperand(1);
7827 if (Op1.getOpcode() != ISD::ADD || Op1.getOperand(0) != Op0 ||
7828 !isa<ConstantSDNode>(Op1.getOperand(1)))
7829 return SDValue();
7830 unsigned N = Op1.getConstantOperandVal(1);
7831 if (N != 1 && N != 2 && N != 4 && N != 8)
7832 return SDValue();
7833
7834 // Check that each other operand matches
7835 for (unsigned I = 2; I < NumElts; I++) {
7836 SDValue OpI = Op.getOperand(I);
7837 if (OpI.getOpcode() != ISD::ADD || OpI.getOperand(0) != Op0 ||
7838 !isa<ConstantSDNode>(OpI.getOperand(1)) ||
7839 OpI.getConstantOperandVal(1) != I * N)
7840 return SDValue();
7841 }
7842
7843 SDLoc DL(Op);
7844 return DAG.getNode(ARMISD::VIDUP, DL, DAG.getVTList(VT, MVT::i32), Op0,
7845 DAG.getConstant(N, DL, MVT::i32));
7846}
7847
7848// Returns true if the operation N can be treated as qr instruction variant at
7849// operand Op.
7850static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {
7851 switch (N->getOpcode()) {
7852 case ISD::ADD:
7853 case ISD::MUL:
7854 case ISD::SADDSAT:
7855 case ISD::UADDSAT:
7856 return true;
7857 case ISD::SUB:
7858 case ISD::SSUBSAT:
7859 case ISD::USUBSAT:
7860 return N->getOperand(1).getNode() == Op;
7862 switch (N->getConstantOperandVal(0)) {
7863 case Intrinsic::arm_mve_add_predicated:
7864 case Intrinsic::arm_mve_mul_predicated:
7865 case Intrinsic::arm_mve_qadd_predicated:
7866 case Intrinsic::arm_mve_vhadd:
7867 case Intrinsic::arm_mve_hadd_predicated:
7868 case Intrinsic::arm_mve_vqdmulh:
7869 case Intrinsic::arm_mve_qdmulh_predicated:
7870 case Intrinsic::arm_mve_vqrdmulh:
7871 case Intrinsic::arm_mve_qrdmulh_predicated:
7872 case Intrinsic::arm_mve_vqdmull:
7873 case Intrinsic::arm_mve_vqdmull_predicated:
7874 return true;
7875 case Intrinsic::arm_mve_sub_predicated:
7876 case Intrinsic::arm_mve_qsub_predicated:
7877 case Intrinsic::arm_mve_vhsub:
7878 case Intrinsic::arm_mve_hsub_predicated:
7879 return N->getOperand(2).getNode() == Op;
7880 default:
7881 return false;
7882 }
7883 default:
7884 return false;
7885 }
7886}
7887
7888// If this is a case we can't handle, return null and let the default
7889// expansion code take care of it.
7890SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
7891 const ARMSubtarget *ST) const {
7892 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7893 SDLoc dl(Op);
7894 EVT VT = Op.getValueType();
7895
7896 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7897 return LowerBUILD_VECTOR_i1(Op, DAG, ST);
7898
7899 if (SDValue R = LowerBUILD_VECTORToVIDUP(Op, DAG, ST))
7900 return R;
7901
7902 APInt SplatBits, SplatUndef;
7903 unsigned SplatBitSize;
7904 bool HasAnyUndefs;
7905 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7906 if (SplatUndef.isAllOnes())
7907 return DAG.getUNDEF(VT);
7908
7909 // If all the users of this constant splat are qr instruction variants,
7910 // generate a vdup of the constant.
7911 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
7912 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
7913 all_of(BVN->uses(),
7914 [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
7915 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7916 : SplatBitSize == 16 ? MVT::v8i16
7917 : MVT::v16i8;
7918 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7919 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7920 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7921 }
7922
7923 if ((ST->hasNEON() && SplatBitSize <= 64) ||
7924 (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7925 // Check if an immediate VMOV works.
7926 EVT VmovVT;
7927 SDValue Val =
7928 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
7929 SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
7930
7931 if (Val.getNode()) {
7932 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
7933 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7934 }
7935
7936 // Try an immediate VMVN.
7937 uint64_t NegatedImm = (~SplatBits).getZExtValue();
7938 Val = isVMOVModifiedImm(
7939 NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7940 VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
7941 if (Val.getNode()) {
7942 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
7943 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7944 }
7945
7946 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
7947 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
7948 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
7949 if (ImmVal != -1) {
7950 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
7951 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
7952 }
7953 }
7954
7955 // If we are under MVE, generate a VDUP(constant), bitcast to the original
7956 // type.
7957 if (ST->hasMVEIntegerOps() &&
7958 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
7959 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7960 : SplatBitSize == 16 ? MVT::v8i16
7961 : MVT::v16i8;
7962 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7963 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7964 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7965 }
7966 }
7967 }
7968
7969 // Scan through the operands to see if only one value is used.
7970 //
7971 // As an optimisation, even if more than one value is used it may be more
7972 // profitable to splat with one value then change some lanes.
7973 //
7974 // Heuristically we decide to do this if the vector has a "dominant" value,
7975 // defined as splatted to more than half of the lanes.
7976 unsigned NumElts = VT.getVectorNumElements();
7977 bool isOnlyLowElement = true;
7978 bool usesOnlyOneValue = true;
7979 bool hasDominantValue = false;
7980 bool isConstant = true;
7981
7982 // Map of the number of times a particular SDValue appears in the
7983 // element list.
7984 DenseMap<SDValue, unsigned> ValueCounts;
7985 SDValue Value;
7986 for (unsigned i = 0; i < NumElts; ++i) {
7987 SDValue V = Op.getOperand(i);
7988 if (V.isUndef())
7989 continue;
7990 if (i > 0)
7991 isOnlyLowElement = false;
7992 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
7993 isConstant = false;
7994
7995 ValueCounts.insert(std::make_pair(V, 0));
7996 unsigned &Count = ValueCounts[V];
7997
7998 // Is this value dominant? (takes up more than half of the lanes)
7999 if (++Count > (NumElts / 2)) {
8000 hasDominantValue = true;
8001 Value = V;
8002 }
8003 }
8004 if (ValueCounts.size() != 1)
8005 usesOnlyOneValue = false;
8006 if (!Value.getNode() && !ValueCounts.empty())
8007 Value = ValueCounts.begin()->first;
8008
8009 if (ValueCounts.empty())
8010 return DAG.getUNDEF(VT);
8011
8012 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
8013 // Keep going if we are hitting this case.
8014 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
8015 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
8016
8017 unsigned EltSize = VT.getScalarSizeInBits();
8018
8019 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
8020 // i32 and try again.
8021 if (hasDominantValue && EltSize <= 32) {
8022 if (!isConstant) {
8023 SDValue N;
8024
8025 // If we are VDUPing a value that comes directly from a vector, that will
8026 // cause an unnecessary move to and from a GPR, where instead we could
8027 // just use VDUPLANE. We can only do this if the lane being extracted
8028 // is at a constant index, as the VDUP from lane instructions only have
8029 // constant-index forms.
8030 ConstantSDNode *constIndex;
8031 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8032 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
8033 // We need to create a new undef vector to use for the VDUPLANE if the
8034 // size of the vector from which we get the value is different than the
8035 // size of the vector that we need to create. We will insert the element
8036 // such that the register coalescer will remove unnecessary copies.
8037 if (VT != Value->getOperand(0).getValueType()) {
8038 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
8040 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8041 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
8042 Value, DAG.getConstant(index, dl, MVT::i32)),
8043 DAG.getConstant(index, dl, MVT::i32));
8044 } else
8045 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8046 Value->getOperand(0), Value->getOperand(1));
8047 } else
8048 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
8049
8050 if (!usesOnlyOneValue) {
8051 // The dominant value was splatted as 'N', but we now have to insert
8052 // all differing elements.
8053 for (unsigned I = 0; I < NumElts; ++I) {
8054 if (Op.getOperand(I) == Value)
8055 continue;
8057 Ops.push_back(N);
8058 Ops.push_back(Op.getOperand(I));
8059 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
8060 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
8061 }
8062 }
8063 return N;
8064 }
8068 assert(FVT == MVT::f32 || FVT == MVT::f16);
8069 MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
8070 for (unsigned i = 0; i < NumElts; ++i)
8071 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
8072 Op.getOperand(i)));
8073 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
8074 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
8075 Val = LowerBUILD_VECTOR(Val, DAG, ST);
8076 if (Val.getNode())
8077 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8078 }
8079 if (usesOnlyOneValue) {
8080 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
8081 if (isConstant && Val.getNode())
8082 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
8083 }
8084 }
8085
8086 // If all elements are constants and the case above didn't get hit, fall back
8087 // to the default expansion, which will generate a load from the constant
8088 // pool.
8089 if (isConstant)
8090 return SDValue();
8091
8092 // Reconstruct the BUILDVECTOR to one of the legal shuffles (such as vext and
8093 // vmovn). Empirical tests suggest this is rarely worth it for vectors of
8094 // length <= 2.
8095 if (NumElts >= 4)
8096 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
8097 return shuffle;
8098
8099 // Attempt to turn a buildvector of scalar fptrunc's or fpext's back into
8100 // VCVT's
8101 if (SDValue VCVT = LowerBuildVectorOfFPTrunc(Op, DAG, Subtarget))
8102 return VCVT;
8103 if (SDValue VCVT = LowerBuildVectorOfFPExt(Op, DAG, Subtarget))
8104 return VCVT;
8105
8106 if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
8107 // If we haven't found an efficient lowering, try splitting a 128-bit vector
8108 // into two 64-bit vectors; we might discover a better way to lower it.
8109 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
8110 EVT ExtVT = VT.getVectorElementType();
8111 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
8112 SDValue Lower = DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[0], NumElts / 2));
8113 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
8114 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
8115 SDValue Upper =
8116 DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[NumElts / 2], NumElts / 2));
8117 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
8118 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
8119 if (Lower && Upper)
8120 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
8121 }
8122
8123 // Vectors with 32- or 64-bit elements can be built by directly assigning
8124 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
8125 // will be legalized.
8126 if (EltSize >= 32) {
8127 // Do the expansion with floating-point types, since that is what the VFP
8128 // registers are defined to use, and since i64 is not legal.
8129 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8130 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8132 for (unsigned i = 0; i < NumElts; ++i)
8133 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
8134 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8135 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8136 }
8137
8138 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
8139 // know the default expansion would otherwise fall back on something even
8140 // worse. For a vector with one or two non-undef values, that's
8141 // scalar_to_vector for the elements followed by a shuffle (provided the
8142 // shuffle is valid for the target) and materialization element by element
8143 // on the stack followed by a load for everything else.
8144 if (!isConstant && !usesOnlyOneValue) {
8145 SDValue Vec = DAG.getUNDEF(VT);
8146 for (unsigned i = 0 ; i < NumElts; ++i) {
8147 SDValue V = Op.getOperand(i);
8148 if (V.isUndef())
8149 continue;
8150 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
8151 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
8152 }
8153 return Vec;
8154 }
8155
8156 return SDValue();
8157}
8158
8159// Gather data to see if the operation can be modelled as a
8160// shuffle in combination with VEXTs.
8161SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
8162 SelectionDAG &DAG) const {
8163 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
8164 SDLoc dl(Op);
8165 EVT VT = Op.getValueType();
8166 unsigned NumElts = VT.getVectorNumElements();
8167
8168 struct ShuffleSourceInfo {
8169 SDValue Vec;
8170 unsigned MinElt = std::numeric_limits<unsigned>::max();
8171 unsigned MaxElt = 0;
8172
8173 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
8174 // be compatible with the shuffle we intend to construct. As a result
8175 // ShuffleVec will be some sliding window into the original Vec.
8176 SDValue ShuffleVec;
8177
8178 // Code should guarantee that element i in Vec starts at element "WindowBase
8179 // + i * WindowScale in ShuffleVec".
8180 int WindowBase = 0;
8181 int WindowScale = 1;
8182
8183 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
8184
8185 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8186 };
8187
8188 // First gather all vectors used as an immediate source for this BUILD_VECTOR
8189 // node.
8191 for (unsigned i = 0; i < NumElts; ++i) {
8192 SDValue V = Op.getOperand(i);
8193 if (V.isUndef())
8194 continue;
8195 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
8196 // A shuffle can only come from building a vector from various
8197 // elements of other vectors.
8198 return SDValue();
8199 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
8200 // Furthermore, shuffles require a constant mask, whereas extractelts
8201 // accept variable indices.
8202 return SDValue();
8203 }
8204
8205 // Add this element source to the list if it's not already there.
8206 SDValue SourceVec = V.getOperand(0);
8207 auto Source = llvm::find(Sources, SourceVec);
8208 if (Source == Sources.end())
8209 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8210
8211 // Update the minimum and maximum lane number seen.
8212 unsigned EltNo = V.getConstantOperandVal(1);
8213 Source->MinElt = std::min(Source->MinElt, EltNo);
8214 Source->MaxElt = std::max(Source->MaxElt, EltNo);
8215 }
8216
8217 // Currently only do something sane when at most two source vectors
8218 // are involved.
8219 if (Sources.size() > 2)
8220 return SDValue();
8221
8222 // Find out the smallest element size among result and two sources, and use
8223 // it as element size to build the shuffle_vector.
8224 EVT SmallestEltTy = VT.getVectorElementType();
8225 for (auto &Source : Sources) {
8226 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8227 if (SrcEltTy.bitsLT(SmallestEltTy))
8228 SmallestEltTy = SrcEltTy;
8229 }
8230 unsigned ResMultiplier =
8231 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
8232 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
8233 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8234
8235 // If the source vector is too wide or too narrow, we may nevertheless be able
8236 // to construct a compatible shuffle either by concatenating it with UNDEF or
8237 // extracting a suitable range of elements.
8238 for (auto &Src : Sources) {
8239 EVT SrcVT = Src.ShuffleVec.getValueType();
8240
8241 uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8242 uint64_t VTSize = VT.getFixedSizeInBits();
8243 if (SrcVTSize == VTSize)
8244 continue;
8245
8246 // This stage of the search produces a source with the same element type as
8247 // the original, but with a total width matching the BUILD_VECTOR output.
8248 EVT EltVT = SrcVT.getVectorElementType();
8249 unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8250 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8251
8252 if (SrcVTSize < VTSize) {
8253 if (2 * SrcVTSize != VTSize)
8254 return SDValue();
8255 // We can pad out the smaller vector for free, so if it's part of a
8256 // shuffle...
8257 Src.ShuffleVec =
8258 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8259 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8260 continue;
8261 }
8262
8263 if (SrcVTSize != 2 * VTSize)
8264 return SDValue();
8265
8266 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8267 // Span too large for a VEXT to cope
8268 return SDValue();
8269 }
8270
8271 if (Src.MinElt >= NumSrcElts) {
8272 // The extraction can just take the second half
8273 Src.ShuffleVec =
8274 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8275 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8276 Src.WindowBase = -NumSrcElts;
8277 } else if (Src.MaxElt < NumSrcElts) {
8278 // The extraction can just take the first half
8279 Src.ShuffleVec =
8280 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8281 DAG.getConstant(0, dl, MVT::i32));
8282 } else {
8283 // An actual VEXT is needed
8284 SDValue VEXTSrc1 =
8285 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8286 DAG.getConstant(0, dl, MVT::i32));
8287 SDValue VEXTSrc2 =
8288 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8289 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8290
8291 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
8292 VEXTSrc2,
8293 DAG.getConstant(Src.MinElt, dl, MVT::i32));
8294 Src.WindowBase = -Src.MinElt;
8295 }
8296 }
8297
8298 // Another possible incompatibility occurs from the vector element types. We
8299 // can fix this by bitcasting the source vectors to the same type we intend
8300 // for the shuffle.
8301 for (auto &Src : Sources) {
8302 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8303 if (SrcEltTy == SmallestEltTy)
8304 continue;
8305 assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
8306 Src.ShuffleVec = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
8307 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
8308 Src.WindowBase *= Src.WindowScale;
8309 }
8310
8311 // Final check before we try to actually produce a shuffle.
8312 LLVM_DEBUG(for (auto Src
8313 : Sources)
8314 assert(Src.ShuffleVec.getValueType() == ShuffleVT););
8315
8316 // The stars all align, our next step is to produce the mask for the shuffle.
8318 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8319 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8320 SDValue Entry = Op.getOperand(i);
8321 if (Entry.isUndef())
8322 continue;
8323
8324 auto Src = llvm::find(Sources, Entry.getOperand(0));
8325 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8326
8327 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8328 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8329 // segment.
8330 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8331 int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8332 VT.getScalarSizeInBits());
8333 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8334
8335 // This source is expected to fill ResMultiplier lanes of the final shuffle,
8336 // starting at the appropriate offset.
8337 int *LaneMask = &Mask[i * ResMultiplier];
8338
8339 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8340 ExtractBase += NumElts * (Src - Sources.begin());
8341 for (int j = 0; j < LanesDefined; ++j)
8342 LaneMask[j] = ExtractBase + j;
8343 }
8344
8345
8346 // We can't handle more than two sources. This should have already
8347 // been checked before this point.
8348 assert(Sources.size() <= 2 && "Too many sources!");
8349
8350 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8351 for (unsigned i = 0; i < Sources.size(); ++i)
8352 ShuffleOps[i] = Sources[i].ShuffleVec;
8353
8354 SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8355 ShuffleOps[1], Mask, DAG);
8356 if (!Shuffle)
8357 return SDValue();
8358 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
8359}
8360
8362 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8371 OP_VUZPL, // VUZP, left result
8372 OP_VUZPR, // VUZP, right result
8373 OP_VZIPL, // VZIP, left result
8374 OP_VZIPR, // VZIP, right result
8375 OP_VTRNL, // VTRN, left result
8376 OP_VTRNR // VTRN, right result
8378
8379static bool isLegalMVEShuffleOp(unsigned PFEntry) {
8380 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8381 switch (OpNum) {
8382 case OP_COPY:
8383 case OP_VREV:
8384 case OP_VDUP0:
8385 case OP_VDUP1:
8386 case OP_VDUP2:
8387 case OP_VDUP3:
8388 return true;
8389 }
8390 return false;
8391}
8392
8393/// isShuffleMaskLegal - Targets can use this to indicate that they only
8394/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
8395/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
8396/// are assumed to be legal.
8398 if (VT.getVectorNumElements() == 4 &&
8399 (VT.is128BitVector() || VT.is64BitVector())) {
8400 unsigned PFIndexes[4];
8401 for (unsigned i = 0; i != 4; ++i) {
8402 if (M[i] < 0)
8403 PFIndexes[i] = 8;
8404 else
8405 PFIndexes[i] = M[i];
8406 }
8407
8408 // Compute the index in the perfect shuffle table.
8409 unsigned PFTableIndex =
8410 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8411 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8412 unsigned Cost = (PFEntry >> 30);
8413
8414 if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
8415 return true;
8416 }
8417
8418 bool ReverseVEXT, isV_UNDEF;
8419 unsigned Imm, WhichResult;
8420
8421 unsigned EltSize = VT.getScalarSizeInBits();
8422 if (EltSize >= 32 ||
8424 ShuffleVectorInst::isIdentityMask(M, M.size()) ||
8425 isVREVMask(M, VT, 64) ||
8426 isVREVMask(M, VT, 32) ||
8427 isVREVMask(M, VT, 16))
8428 return true;
8429 else if (Subtarget->hasNEON() &&
8430 (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
8431 isVTBLMask(M, VT) ||
8432 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
8433 return true;
8434 else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8435 isReverseMask(M, VT))
8436 return true;
8437 else if (Subtarget->hasMVEIntegerOps() &&
8438 (isVMOVNMask(M, VT, true, false) ||
8439 isVMOVNMask(M, VT, false, false) || isVMOVNMask(M, VT, true, true)))
8440 return true;
8441 else if (Subtarget->hasMVEIntegerOps() &&
8442 (isTruncMask(M, VT, false, false) ||
8443 isTruncMask(M, VT, false, true) ||
8444 isTruncMask(M, VT, true, false) || isTruncMask(M, VT, true, true)))
8445 return true;
8446 else
8447 return false;
8448}
8449
8450/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8451/// the specified operations to build the shuffle.
8452static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8453 SDValue RHS, SelectionDAG &DAG,
8454 const SDLoc &dl) {
8455 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8456 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8457 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8458
8459 if (OpNum == OP_COPY) {
8460 if (LHSID == (1*9+2)*9+3) return LHS;
8461 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8462 return RHS;
8463 }
8464
8465 SDValue OpLHS, OpRHS;
8466 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8467 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8468 EVT VT = OpLHS.getValueType();
8469
8470 switch (OpNum) {
8471 default: llvm_unreachable("Unknown shuffle opcode!");
8472 case OP_VREV:
8473 // VREV divides the vector in half and swaps within the half.
8474 if (VT.getScalarSizeInBits() == 32)
8475 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
8476 // vrev <4 x i16> -> VREV32
8477 if (VT.getScalarSizeInBits() == 16)
8478 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
8479 // vrev <4 x i8> -> VREV16
8480 assert(VT.getScalarSizeInBits() == 8);
8481 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
8482 case OP_VDUP0:
8483 case OP_VDUP1:
8484 case OP_VDUP2:
8485 case OP_VDUP3:
8486 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8487 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
8488 case OP_VEXT1:
8489 case OP_VEXT2:
8490 case OP_VEXT3:
8491 return DAG.getNode(ARMISD::VEXT, dl, VT,
8492 OpLHS, OpRHS,
8493 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
8494 case OP_VUZPL:
8495 case OP_VUZPR:
8496 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
8497 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
8498 case OP_VZIPL:
8499 case OP_VZIPR:
8500 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
8501 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
8502 case OP_VTRNL:
8503 case OP_VTRNR:
8504 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
8505 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
8506 }
8507}
8508
8510 ArrayRef<int> ShuffleMask,
8511 SelectionDAG &DAG) {
8512 // Check to see if we can use the VTBL instruction.
8513 SDValue V1 = Op.getOperand(0);
8514 SDValue V2 = Op.getOperand(1);
8515 SDLoc DL(Op);
8516
8517 SmallVector<SDValue, 8> VTBLMask;
8518 for (int I : ShuffleMask)
8519 VTBLMask.push_back(DAG.getConstant(I, DL, MVT::i32));
8520
8521 if (V2.getNode()->isUndef())
8522 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
8523 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8524
8525 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
8526 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8527}
8528
8530 SDLoc DL(Op);
8531 EVT VT = Op.getValueType();
8532
8533 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8534 "Expect an v8i16/v16i8 type");
8535 SDValue OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, Op.getOperand(0));
8536 // For a v16i8 type: After the VREV, we have got <7, ..., 0, 15, ..., 8>. Now,
8537 // extract the first 8 bytes into the top double word and the last 8 bytes
8538 // into the bottom double word, through a new vector shuffle that will be
8539 // turned into a VEXT on Neon, or a couple of VMOVDs on MVE.
8540 std::vector<int> NewMask;
8541 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8542 NewMask.push_back(VT.getVectorNumElements() / 2 + i);
8543 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8544 NewMask.push_back(i);
8545 return DAG.getVectorShuffle(VT, DL, OpLHS, OpLHS, NewMask);
8546}
8547
8549 switch (VT.getSimpleVT().SimpleTy) {
8550 case MVT::v2i1:
8551 return MVT::v2f64;
8552 case MVT::v4i1:
8553 return MVT::v4i32;
8554 case MVT::v8i1:
8555 return MVT::v8i16;
8556 case MVT::v16i1:
8557 return MVT::v16i8;
8558 default:
8559 llvm_unreachable("Unexpected vector predicate type");
8560 }
8561}
8562
8564 SelectionDAG &DAG) {
8565 // Converting from boolean predicates to integers involves creating a vector
8566 // of all ones or all zeroes and selecting the lanes based upon the real
8567 // predicate.
8569 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32);
8570 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes);
8571
8572 SDValue AllZeroes =
8573 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32);
8574 AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
8575
8576 // Get full vector type from predicate type
8578
8579 SDValue RecastV1;
8580 // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast
8581 // this to a v16i1. This cannot be done with an ordinary bitcast because the
8582 // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,
8583 // since we know in hardware the sizes are really the same.
8584 if (VT != MVT::v16i1)
8585 RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
8586 else
8587 RecastV1 = Pred;
8588
8589 // Select either all ones or zeroes depending upon the real predicate bits.
8590 SDValue PredAsVector =
8591 DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);
8592
8593 // Recast our new predicate-as-integer v16i8 vector into something
8594 // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.
8595 return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);
8596}
8597
8599 const ARMSubtarget *ST) {
8600 EVT VT = Op.getValueType();
8601 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8602 ArrayRef<int> ShuffleMask = SVN->getMask();
8603
8604 assert(ST->hasMVEIntegerOps() &&
8605 "No support for vector shuffle of boolean predicates");
8606
8607 SDValue V1 = Op.getOperand(0);
8608 SDValue V2 = Op.getOperand(1);
8609 SDLoc dl(Op);
8610 if (isReverseMask(ShuffleMask, VT)) {
8611 SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1);
8612 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);
8613 SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,
8614 DAG.getConstant(16, dl, MVT::i32));
8615 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
8616 }
8617
8618 // Until we can come up with optimised cases for every single vector
8619 // shuffle in existence we have chosen the least painful strategy. This is
8620 // to essentially promote the boolean predicate to a 8-bit integer, where
8621 // each predicate represents a byte. Then we fall back on a normal integer
8622 // vector shuffle and convert the result back into a predicate vector. In
8623 // many cases the generated code might be even better than scalar code
8624 // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
8625 // fields in a register into 8 other arbitrary 2-bit fields!
8626 SDValue PredAsVector1 = PromoteMVEPredVector(dl, V1, VT, DAG);
8627 EVT NewVT = PredAsVector1.getValueType();
8628 SDValue PredAsVector2 = V2.isUndef() ? DAG.getUNDEF(NewVT)
8629 : PromoteMVEPredVector(dl, V2, VT, DAG);
8630 assert(PredAsVector2.getValueType() == NewVT &&
8631 "Expected identical vector type in expanded i1 shuffle!");
8632
8633 // Do the shuffle!
8634 SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector1,
8635 PredAsVector2, ShuffleMask);
8636
8637 // Now return the result of comparing the shuffled vector with zero,
8638 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1
8639 // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
8640 if (VT == MVT::v2i1) {
8641 SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
8642 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
8643 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8644 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8645 }
8646 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
8647 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8648}
8649
8651 ArrayRef<int> ShuffleMask,
8652 SelectionDAG &DAG) {
8653 // Attempt to lower the vector shuffle using as many whole register movs as
8654 // possible. This is useful for types smaller than 32bits, which would
8655 // often otherwise become a series for grp movs.
8656 SDLoc dl(Op);
8657 EVT VT = Op.getValueType();
8658 if (VT.getScalarSizeInBits() >= 32)
8659 return SDValue();
8660
8661 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8662 "Unexpected vector type");
8663 int NumElts = VT.getVectorNumElements();
8664 int QuarterSize = NumElts / 4;
8665 // The four final parts of the vector, as i32's
8666 SDValue Parts[4];
8667
8668 // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not
8669 // <u,u,u,u>), returning the vmov lane index
8670 auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {
8671 // Detect which mov lane this would be from the first non-undef element.
8672 int MovIdx = -1;
8673 for (int i = 0; i < Length; i++) {
8674 if (ShuffleMask[Start + i] >= 0) {
8675 if (ShuffleMask[Start + i] % Length != i)
8676 return -1;
8677 MovIdx = ShuffleMask[Start + i] / Length;
8678 break;
8679 }
8680 }
8681 // If all items are undef, leave this for other combines
8682 if (MovIdx == -1)
8683 return -1;
8684 // Check the remaining values are the correct part of the same mov
8685 for (int i = 1; i < Length; i++) {
8686 if (ShuffleMask[Start + i] >= 0 &&
8687 (ShuffleMask[Start + i] / Length != MovIdx ||
8688 ShuffleMask[Start + i] % Length != i))
8689 return -1;
8690 }
8691 return MovIdx;
8692 };
8693
8694 for (int Part = 0; Part < 4; ++Part) {
8695 // Does this part look like a mov
8696 int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8697 if (Elt != -1) {
8698 SDValue Input = Op->getOperand(0);
8699 if (Elt >= 4) {
8700 Input = Op->getOperand(1);
8701 Elt -= 4;
8702 }
8703 SDValue BitCast = DAG.getBitcast(MVT::v4f32, Input);
8704 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, BitCast,
8705 DAG.getConstant(Elt, dl, MVT::i32));
8706 }
8707 }
8708
8709 // Nothing interesting found, just return
8710 if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8711 return SDValue();
8712
8713 // The other parts need to be built with the old shuffle vector, cast to a
8714 // v4i32 and extract_vector_elts
8715 if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8716 SmallVector<int, 16> NewShuffleMask;
8717 for (int Part = 0; Part < 4; ++Part)
8718 for (int i = 0; i < QuarterSize; i++)
8719 NewShuffleMask.push_back(
8720 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8721 SDValue NewShuffle = DAG.getVectorShuffle(
8722 VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8723 SDValue BitCast = DAG.getBitcast(MVT::v4f32, NewShuffle);
8724
8725 for (int Part = 0; Part < 4; ++Part)
8726 if (!Parts[Part])
8727 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32,
8728 BitCast, DAG.getConstant(Part, dl, MVT::i32));
8729 }
8730 // Build a vector out of the various parts and bitcast it back to the original
8731 // type.
8732 SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR, dl, MVT::v4f32, Parts);
8733 return DAG.getBitcast(VT, NewVec);
8734}
8735
8737 ArrayRef<int> ShuffleMask,
8738 SelectionDAG &DAG) {
8739 SDValue V1 = Op.getOperand(0);
8740 SDValue V2 = Op.getOperand(1);
8741 EVT VT = Op.getValueType();
8742 unsigned NumElts = VT.getVectorNumElements();
8743
8744 // An One-Off Identity mask is one that is mostly an identity mask from as
8745 // single source but contains a single element out-of-place, either from a
8746 // different vector or from another position in the same vector. As opposed to
8747 // lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert
8748 // pair directly.
8749 auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,
8750 int &OffElement) {
8751 OffElement = -1;
8752 int NonUndef = 0;
8753 for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8754 if (Mask[i] == -1)
8755 continue;
8756 NonUndef++;
8757 if (Mask[i] != i + BaseOffset) {
8758 if (OffElement == -1)
8759 OffElement = i;
8760 else
8761 return false;
8762 }
8763 }
8764 return NonUndef > 2 && OffElement != -1;
8765 };
8766 int OffElement;
8767 SDValue VInput;
8768 if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8769 VInput = V1;
8770 else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8771 VInput = V2;
8772 else
8773 return SDValue();
8774
8775 SDLoc dl(Op);
8776 EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16
8777 ? MVT::i32
8778 : VT.getScalarType();
8779 SDValue Elt = DAG.getNode(
8780 ISD::EXTRACT_VECTOR_ELT, dl, SVT,
8781 ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,
8782 DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));
8783 return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,
8784 DAG.getVectorIdxConstant(OffElement % NumElts, dl));
8785}
8786
8788 const ARMSubtarget *ST) {
8789 SDValue V1 = Op.getOperand(0);
8790 SDValue V2 = Op.getOperand(1);
8791 SDLoc dl(Op);
8792 EVT VT = Op.getValueType();
8793 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8794 unsigned EltSize = VT.getScalarSizeInBits();
8795
8796 if (ST->hasMVEIntegerOps() && EltSize == 1)
8797 return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);
8798
8799 // Convert shuffles that are directly supported on NEON to target-specific
8800 // DAG nodes, instead of keeping them as shuffles and matching them again
8801 // during code selection. This is more efficient and avoids the possibility
8802 // of inconsistencies between legalization and selection.
8803 // FIXME: floating-point vectors should be canonicalized to integer vectors
8804 // of the same time so that they get CSEd properly.
8805 ArrayRef<int> ShuffleMask = SVN->getMask();
8806
8807 if (EltSize <= 32) {
8808 if (SVN->isSplat()) {
8809 int Lane = SVN->getSplatIndex();
8810 // If this is undef splat, generate it via "just" vdup, if possible.
8811 if (Lane == -1) Lane = 0;
8812
8813 // Test if V1 is a SCALAR_TO_VECTOR.
8814 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
8815 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8816 }
8817 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
8818 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
8819 // reaches it).
8820 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
8821 !isa<ConstantSDNode>(V1.getOperand(0))) {
8822 bool IsScalarToVector = true;
8823 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
8824 if (!V1.getOperand(i).isUndef()) {
8825 IsScalarToVector = false;
8826 break;
8827 }
8828 if (IsScalarToVector)
8829 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8830 }
8831 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
8832 DAG.getConstant(Lane, dl, MVT::i32));
8833 }
8834
8835 bool ReverseVEXT = false;
8836 unsigned Imm = 0;
8837 if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8838 if (ReverseVEXT)
8839 std::swap(V1, V2);
8840 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
8841 DAG.getConstant(Imm, dl, MVT::i32));
8842 }
8843
8844 if (isVREVMask(ShuffleMask, VT, 64))
8845 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
8846 if (isVREVMask(ShuffleMask, VT, 32))
8847 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
8848 if (isVREVMask(ShuffleMask, VT, 16))
8849 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
8850
8851 if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8852 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
8853 DAG.getConstant(Imm, dl, MVT::i32));
8854 }
8855
8856 // Check for Neon shuffles that modify both input vectors in place.
8857 // If both results are used, i.e., if there are two shuffles with the same
8858 // source operands and with masks corresponding to both results of one of
8859 // these operations, DAG memoization will ensure that a single node is
8860 // used for both shuffles.
8861 unsigned WhichResult = 0;
8862 bool isV_UNDEF = false;
8863 if (ST->hasNEON()) {
8864 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8865 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8866 if (isV_UNDEF)
8867 V2 = V1;
8868 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
8869 .getValue(WhichResult);
8870 }
8871 }
8872 if (ST->hasMVEIntegerOps()) {
8873 if (isVMOVNMask(ShuffleMask, VT, false, false))
8874 return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8875 DAG.getConstant(0, dl, MVT::i32));
8876 if (isVMOVNMask(ShuffleMask, VT, true, false))
8877 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8878 DAG.getConstant(1, dl, MVT::i32));
8879 if (isVMOVNMask(ShuffleMask, VT, true, true))
8880 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V1,
8881 DAG.getConstant(1, dl, MVT::i32));
8882 }
8883
8884 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
8885 // shuffles that produce a result larger than their operands with:
8886 // shuffle(concat(v1, undef), concat(v2, undef))
8887 // ->
8888 // shuffle(concat(v1, v2), undef)
8889 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
8890 //
8891 // This is useful in the general case, but there are special cases where
8892 // native shuffles produce larger results: the two-result ops.
8893 //
8894 // Look through the concat when lowering them:
8895 // shuffle(concat(v1, v2), undef)
8896 // ->
8897 // concat(VZIP(v1, v2):0, :1)
8898 //
8899 if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8900 SDValue SubV1 = V1->getOperand(0);
8901 SDValue SubV2 = V1->getOperand(1);
8902 EVT SubVT = SubV1.getValueType();
8903
8904 // We expect these to have been canonicalized to -1.
8905 assert(llvm::all_of(ShuffleMask, [&](int i) {
8906 return i < (int)VT.getVectorNumElements();
8907 }) && "Unexpected shuffle index into UNDEF operand!");
8908
8909 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8910 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8911 if (isV_UNDEF)
8912 SubV2 = SubV1;
8913 assert((WhichResult == 0) &&
8914 "In-place shuffle of concat can only have one result!");
8915 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
8916 SubV1, SubV2);
8917 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
8918 Res.getValue(1));
8919 }
8920 }
8921 }
8922
8923 if (ST->hasMVEIntegerOps() && EltSize <= 32) {
8924 if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))
8925 return V;
8926
8927 for (bool Top : {false, true}) {
8928 for (bool SingleSource : {false, true}) {
8929 if (isTruncMask(ShuffleMask, VT, Top, SingleSource)) {
8930 MVT FromSVT = MVT::getIntegerVT(EltSize * 2);
8931 MVT FromVT = MVT::getVectorVT(FromSVT, ShuffleMask.size() / 2);
8932 SDValue Lo = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT, V1);
8933 SDValue Hi = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT,
8934 SingleSource ? V1 : V2);
8935 if (Top) {
8936 SDValue Amt = DAG.getConstant(EltSize, dl, FromVT);
8937 Lo = DAG.getNode(ISD::SRL, dl, FromVT, Lo, Amt);
8938 Hi = DAG.getNode(ISD::SRL, dl, FromVT, Hi, Amt);
8939 }
8940 return DAG.getNode(ARMISD::MVETRUNC, dl, VT, Lo, Hi);
8941 }
8942 }
8943 }
8944 }
8945
8946 // If the shuffle is not directly supported and it has 4 elements, use
8947 // the PerfectShuffle-generated table to synthesize it from other shuffles.
8948 unsigned NumElts = VT.getVectorNumElements();
8949 if (NumElts == 4) {
8950 unsigned PFIndexes[4];
8951 for (unsigned i = 0; i != 4; ++i) {
8952 if (ShuffleMask[i] < 0)
8953 PFIndexes[i] = 8;
8954 else
8955 PFIndexes[i] = ShuffleMask[i];
8956 }
8957
8958 // Compute the index in the perfect shuffle table.
8959 unsigned PFTableIndex =
8960 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8961 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8962 unsigned Cost = (PFEntry >> 30);
8963
8964 if (Cost <= 4) {
8965 if (ST->hasNEON())
8966 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8967 else if (isLegalMVEShuffleOp(PFEntry)) {
8968 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8969 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8970 unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
8971 unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
8972 if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
8973 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8974 }
8975 }
8976 }
8977
8978 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
8979 if (EltSize >= 32) {
8980 // Do the expansion with floating-point types, since that is what the VFP
8981 // registers are defined to use, and since i64 is not legal.
8982 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8983 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8984 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
8985 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
8987 for (unsigned i = 0; i < NumElts; ++i) {
8988 if (ShuffleMask[i] < 0)
8989 Ops.push_back(DAG.getUNDEF(EltVT));
8990 else
8991 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
8992 ShuffleMask[i] < (int)NumElts ? V1 : V2,
8993 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
8994 dl, MVT::i32)));
8995 }
8996 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8997 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8998 }
8999
9000 if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
9001 isReverseMask(ShuffleMask, VT))
9002 return LowerReverse_VECTOR_SHUFFLE(Op, DAG);
9003
9004 if (ST->hasNEON() && VT == MVT::v8i8)
9005 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
9006 return NewOp;
9007
9008 if (ST->hasMVEIntegerOps())
9009 if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))
9010 return NewOp;
9011
9012 return SDValue();
9013}
9014
9016 const ARMSubtarget *ST) {
9017 EVT VecVT = Op.getOperand(0).getValueType();
9018 SDLoc dl(Op);
9019
9020 assert(ST->hasMVEIntegerOps() &&
9021 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9022
9023 SDValue Conv =
9024 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9025 unsigned Lane = Op.getConstantOperandVal(2);
9026 unsigned LaneWidth =
9028 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
9029 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32,
9030 Op.getOperand(1), DAG.getValueType(MVT::i1));
9031 SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,
9032 DAG.getConstant(~Mask, dl, MVT::i32));
9033 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);
9034}
9035
9036SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
9037 SelectionDAG &DAG) const {
9038 // INSERT_VECTOR_ELT is legal only for immediate indexes.
9039 SDValue Lane = Op.getOperand(2);
9040 if (!isa<ConstantSDNode>(Lane))
9041 return SDValue();
9042
9043 SDValue Elt = Op.getOperand(1);
9044 EVT EltVT = Elt.getValueType();
9045
9046 if (Subtarget->hasMVEIntegerOps() &&
9047 Op.getValueType().getScalarSizeInBits() == 1)
9048 return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
9049
9050 if (getTypeAction(*DAG.getContext(), EltVT) ==
9052 // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
9053 // but the type system will try to do that if we don't intervene.
9054 // Reinterpret any such vector-element insertion as one with the
9055 // corresponding integer types.
9056
9057 SDLoc dl(Op);
9058
9059 EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
9060 assert(getTypeAction(*DAG.getContext(), IEltVT) !=
9062
9063 SDValue VecIn = Op.getOperand(0);
9064 EVT VecVT = VecIn.getValueType();
9065 EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
9066 VecVT.getVectorNumElements());
9067
9068 SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
9069 SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
9070 SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
9071 IVecIn, IElt, Lane);
9072 return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
9073 }
9074
9075 return Op;
9076}
9077
9079 const ARMSubtarget *ST) {
9080 EVT VecVT = Op.getOperand(0).getValueType();
9081 SDLoc dl(Op);
9082
9083 assert(ST->hasMVEIntegerOps() &&
9084 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9085
9086 SDValue Conv =
9087 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9088 unsigned Lane = Op.getConstantOperandVal(1);
9089 unsigned LaneWidth =
9091 SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
9092 DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));
9093 return Shift;
9094}
9095
9097 const ARMSubtarget *ST) {
9098 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
9099 SDValue Lane = Op.getOperand(1);
9100 if (!isa<ConstantSDNode>(Lane))
9101 return SDValue();
9102
9103 SDValue Vec = Op.getOperand(0);
9104 EVT VT = Vec.getValueType();
9105
9106 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9107 return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);
9108
9109 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
9110 SDLoc dl(Op);
9111 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
9112 }
9113
9114 return Op;
9115}
9116
9118 const ARMSubtarget *ST) {
9119 SDLoc dl(Op);
9120 assert(Op.getValueType().getScalarSizeInBits() == 1 &&
9121 "Unexpected custom CONCAT_VECTORS lowering");
9123 "Unexpected custom CONCAT_VECTORS lowering");
9124 assert(ST->hasMVEIntegerOps() &&
9125 "CONCAT_VECTORS lowering only supported for MVE");
9126
9127 auto ConcatPair = [&](SDValue V1, SDValue V2) {
9128 EVT Op1VT = V1.getValueType();
9129 EVT Op2VT = V2.getValueType();
9130 assert(Op1VT == Op2VT && "Operand types don't match!");
9131 assert((Op1VT == MVT::v2i1 || Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) &&
9132 "Unexpected i1 concat operations!");
9133 EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());
9134
9135 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9136 SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
9137
9138 // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
9139 // promoted to v8i16, etc.
9140 MVT ElType =
9142 unsigned NumElts = 2 * Op1VT.getVectorNumElements();
9143
9144 EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
9145 if (Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) {
9146 // Use MVETRUNC to truncate the combined NewV1::NewV2 into the smaller
9147 // ConcatVT.
9148 SDValue ConVec =
9149 DAG.getNode(ARMISD::MVETRUNC, dl, ConcatVT, NewV1, NewV2);
9150 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9151 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9152 }
9153
9154 // Extract the vector elements from Op1 and Op2 one by one and truncate them
9155 // to be the right size for the destination. For example, if Op1 is v4i1
9156 // then the promoted vector is v4i32. The result of concatenation gives a
9157 // v8i1, which when promoted is v8i16. That means each i32 element from Op1
9158 // needs truncating to i16 and inserting in the result.
9159 auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
9160 EVT NewVT = NewV.getValueType();
9161 EVT ConcatVT = ConVec.getValueType();
9162 unsigned ExtScale = 1;
9163 if (NewVT == MVT::v2f64) {
9164 NewV = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, NewV);
9165 ExtScale = 2;
9166 }
9167 for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
9168 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
9169 DAG.getIntPtrConstant(i * ExtScale, dl));
9170 ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
9171 DAG.getConstant(j, dl, MVT::i32));
9172 }
9173 return ConVec;
9174 };
9175 unsigned j = 0;
9176 SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
9177 ConVec = ExtractInto(NewV1, ConVec, j);
9178 ConVec = ExtractInto(NewV2, ConVec, j);
9179
9180 // Now return the result of comparing the subvector with zero, which will
9181 // generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9182 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9183 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9184 };
9185
9186 // Concat each pair of subvectors and pack into the lower half of the array.
9187 SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
9188 while (ConcatOps.size() > 1) {
9189 for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
9190 SDValue V1 = ConcatOps[I];
9191 SDValue V2 = ConcatOps[I + 1];
9192 ConcatOps[I / 2] = ConcatPair(V1, V2);
9193 }
9194 ConcatOps.resize(ConcatOps.size() / 2);
9195 }
9196 return ConcatOps[0];
9197}
9198
9200 const ARMSubtarget *ST) {
9201 EVT VT = Op->getValueType(0);
9202 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9203 return LowerCONCAT_VECTORS_i1(Op, DAG, ST);
9204
9205 // The only time a CONCAT_VECTORS operation can have legal types is when
9206 // two 64-bit vectors are concatenated to a 128-bit vector.
9207 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
9208 "unexpected CONCAT_VECTORS");
9209 SDLoc dl(Op);
9210 SDValue Val = DAG.getUNDEF(MVT::v2f64);
9211 SDValue Op0 = Op.getOperand(0);
9212 SDValue Op1 = Op.getOperand(1);
9213 if (!Op0.isUndef())
9214 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9215 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
9216 DAG.getIntPtrConstant(0, dl));
9217 if (!Op1.isUndef())
9218 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9219 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
9220 DAG.getIntPtrConstant(1, dl));
9221 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
9222}
9223
9225 const ARMSubtarget *ST) {
9226 SDValue V1 = Op.getOperand(0);
9227 SDValue V2 = Op.getOperand(1);
9228 SDLoc dl(Op);
9229 EVT VT = Op.getValueType();
9230 EVT Op1VT = V1.getValueType();
9231 unsigned NumElts = VT.getVectorNumElements();
9232 unsigned Index = V2->getAsZExtVal();
9233
9234 assert(VT.getScalarSizeInBits() == 1 &&
9235 "Unexpected custom EXTRACT_SUBVECTOR lowering");
9236 assert(ST->hasMVEIntegerOps() &&
9237 "EXTRACT_SUBVECTOR lowering only supported for MVE");
9238
9239 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9240
9241 // We now have Op1 promoted to a vector of integers, where v8i1 gets
9242 // promoted to v8i16, etc.
9243
9245
9246 if (NumElts == 2) {
9247 EVT SubVT = MVT::v4i32;
9248 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9249 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
9250 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9251 DAG.getIntPtrConstant(i, dl));
9252 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9253 DAG.getConstant(j, dl, MVT::i32));
9254 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9255 DAG.getConstant(j + 1, dl, MVT::i32));
9256 }
9257 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec,
9258 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9259 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
9260 }
9261
9262 EVT SubVT = MVT::getVectorVT(ElType, NumElts);
9263 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9264 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
9265 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9266 DAG.getIntPtrConstant(i, dl));
9267 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9268 DAG.getConstant(j, dl, MVT::i32));
9269 }
9270
9271 // Now return the result of comparing the subvector with zero,
9272 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9273 return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
9274 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9275}
9276
9277// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
9279 const ARMSubtarget *ST) {
9280 assert(ST->hasMVEIntegerOps() && "Expected MVE!");
9281 EVT VT = N->getValueType(0);
9282 assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
9283 "Expected a vector i1 type!");
9284 SDValue Op = N->getOperand(0);
9285 EVT FromVT = Op.getValueType();
9286 SDLoc DL(N);
9287
9288 SDValue And =
9289 DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
9290 return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
9291 DAG.getCondCode(ISD::SETNE));
9292}
9293
9295 const ARMSubtarget *Subtarget) {
9296 if (!Subtarget->hasMVEIntegerOps())
9297 return SDValue();
9298
9299 EVT ToVT = N->getValueType(0);
9300 if (ToVT.getScalarType() == MVT::i1)
9301 return LowerTruncatei1(N, DAG, Subtarget);
9302
9303 // MVE does not have a single instruction to perform the truncation of a v4i32
9304 // into the lower half of a v8i16, in the same way that a NEON vmovn would.
9305 // Most of the instructions in MVE follow the 'Beats' system, where moving
9306 // values from different lanes is usually something that the instructions
9307 // avoid.
9308 //
9309 // Instead it has top/bottom instructions such as VMOVLT/B and VMOVNT/B,
9310 // which take a the top/bottom half of a larger lane and extend it (or do the
9311 // opposite, truncating into the top/bottom lane from a larger lane). Note
9312 // that because of the way we widen lanes, a v4i16 is really a v4i32 using the
9313 // bottom 16bits from each vector lane. This works really well with T/B
9314 // instructions, but that doesn't extend to v8i32->v8i16 where the lanes need
9315 // to move order.
9316 //
9317 // But truncates and sext/zext are always going to be fairly common from llvm.
9318 // We have several options for how to deal with them:
9319 // - Wherever possible combine them into an instruction that makes them
9320 // "free". This includes loads/stores, which can perform the trunc as part
9321 // of the memory operation. Or certain shuffles that can be turned into
9322 // VMOVN/VMOVL.
9323 // - Lane Interleaving to transform blocks surrounded by ext/trunc. So
9324 // trunc(mul(sext(a), sext(b))) may become
9325 // VMOVNT(VMUL(VMOVLB(a), VMOVLB(b)), VMUL(VMOVLT(a), VMOVLT(b))). (Which in
9326 // this case can use VMULL). This is performed in the
9327 // MVELaneInterleavingPass.
9328 // - Otherwise we have an option. By default we would expand the
9329 // zext/sext/trunc into a series of lane extract/inserts going via GPR
9330 // registers. One for each vector lane in the vector. This can obviously be
9331 // very expensive.
9332 // - The other option is to use the fact that loads/store can extend/truncate
9333 // to turn a trunc into two truncating stack stores and a stack reload. This
9334 // becomes 3 back-to-back memory operations, but at least that is less than
9335 // all the insert/extracts.
9336 //
9337 // In order to do the last, we convert certain trunc's into MVETRUNC, which
9338 // are either optimized where they can be, or eventually lowered into stack
9339 // stores/loads. This prevents us from splitting a v8i16 trunc into two stores
9340 // two early, where other instructions would be better, and stops us from
9341 // having to reconstruct multiple buildvector shuffles into loads/stores.
9342 if (ToVT != MVT::v8i16 && ToVT != MVT::v16i8)
9343 return SDValue();
9344 EVT FromVT = N->getOperand(0).getValueType();
9345 if (FromVT != MVT::v8i32 && FromVT != MVT::v16i16)
9346 return SDValue();
9347
9348 SDValue Lo, Hi;
9349 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
9350 SDLoc DL(N);
9351 return DAG.getNode(ARMISD::MVETRUNC, DL, ToVT, Lo, Hi);
9352}
9353
9355 const ARMSubtarget *Subtarget) {
9356 if (!Subtarget->hasMVEIntegerOps())
9357 return SDValue();
9358
9359 // See LowerTruncate above for an explanation of MVEEXT/MVETRUNC.
9360
9361 EVT ToVT = N->getValueType(0);
9362 if (ToVT != MVT::v16i32 && ToVT != MVT::v8i32 && ToVT != MVT::v16i16)
9363 return SDValue();
9364 SDValue Op = N->getOperand(0);
9365 EVT FromVT = Op.getValueType();
9366 if (FromVT != MVT::v8i16 && FromVT != MVT::v16i8)
9367 return SDValue();
9368
9369 SDLoc DL(N);
9370 EVT ExtVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());
9371 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8)
9372 ExtVT = MVT::v8i16;
9373
9374 unsigned Opcode =
9376 SDValue Ext = DAG.getNode(Opcode, DL, DAG.getVTList(ExtVT, ExtVT), Op);
9377 SDValue Ext1 = Ext.getValue(1);
9378
9379 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8) {
9380 Ext = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext);
9381 Ext1 = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext1);
9382 }
9383
9384 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Ext, Ext1);
9385}
9386
9387/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
9388/// element has been zero/sign-extended, depending on the isSigned parameter,
9389/// from an integer type half its size.
9391 bool isSigned) {
9392 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
9393 EVT VT = N->getValueType(0);
9394 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
9395 SDNode *BVN = N->getOperand(0).getNode();
9396 if (BVN->getValueType(0) != MVT::v4i32 ||
9397 BVN->getOpcode() != ISD::BUILD_VECTOR)
9398 return false;
9399 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9400 unsigned HiElt = 1 - LoElt;
9401 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
9402 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
9403 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
9404 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
9405 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
9406 return false;
9407 if (isSigned) {
9408 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
9409 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
9410 return true;
9411 } else {
9412 if (Hi0->isZero() && Hi1->isZero())
9413 return true;
9414 }
9415 return false;
9416 }
9417
9418 if (N->getOpcode() != ISD::BUILD_VECTOR)
9419 return false;
9420
9421 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9422 SDNode *Elt = N->getOperand(i).getNode();
9423 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
9424 unsigned EltSize = VT.getScalarSizeInBits();
9425 unsigned HalfSize = EltSize / 2;
9426 if (isSigned) {
9427 if (!isIntN(HalfSize, C->getSExtValue()))
9428 return false;
9429 } else {
9430 if (!isUIntN(HalfSize, C->getZExtValue()))
9431 return false;
9432 }
9433 continue;
9434 }
9435 return false;
9436 }
9437
9438 return true;
9439}
9440
9441/// isSignExtended - Check if a node is a vector value that is sign-extended
9442/// or a constant BUILD_VECTOR with sign-extended elements.
9444 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
9445 return true;
9446 if (isExtendedBUILD_VECTOR(N, DAG, true))
9447 return true;
9448 return false;
9449}
9450
9451/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
9452/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
9454 if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
9456 return true;
9457 if (isExtendedBUILD_VECTOR(N, DAG, false))
9458 return true;
9459 return false;
9460}
9461
9462static EVT getExtensionTo64Bits(const EVT &OrigVT) {
9463 if (OrigVT.getSizeInBits() >= 64)
9464 return OrigVT;
9465
9466 assert(OrigVT.isSimple() && "Expecting a simple value type");
9467
9468 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
9469 switch (OrigSimpleTy) {
9470 default: llvm_unreachable("Unexpected Vector Type");
9471 case MVT::v2i8:
9472 case MVT::v2i16:
9473 return MVT::v2i32;
9474 case MVT::v4i8:
9475 return MVT::v4i16;
9476 }
9477}
9478
9479/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
9480/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
9481/// We insert the required extension here to get the vector to fill a D register.
9483 const EVT &OrigTy,
9484 const EVT &ExtTy,
9485 unsigned ExtOpcode) {
9486 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
9487 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
9488 // 64-bits we need to insert a new extension so that it will be 64-bits.
9489 assert(ExtTy.is128BitVector() && "Unexpected extension size");
9490 if (OrigTy.getSizeInBits() >= 64)
9491 return N;
9492
9493 // Must extend size to at least 64 bits to be used as an operand for VMULL.
9494 EVT NewVT = getExtensionTo64Bits(OrigTy);
9495
9496 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
9497}
9498
9499/// SkipLoadExtensionForVMULL - return a load of the original vector size that
9500/// does not do any sign/zero extension. If the original vector is less
9501/// than 64 bits, an appropriate extension will be added after the load to
9502/// reach a total size of 64 bits. We have to add the extension separately
9503/// because ARM does not have a sign/zero extending load for vectors.
9505 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
9506
9507 // The load already has the right type.
9508 if (ExtendedTy == LD->getMemoryVT())
9509 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
9510 LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9511 LD->getMemOperand()->getFlags());
9512
9513 // We need to create a zextload/sextload. We cannot just create a load
9514 // followed by a zext/zext node because LowerMUL is also run during normal
9515 // operation legalization where we can't create illegal types.
9516 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
9517 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9518 LD->getMemoryVT(), LD->getAlign(),
9519 LD->getMemOperand()->getFlags());
9520}
9521
9522/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
9523/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
9524/// the unextended value. The unextended vector should be 64 bits so that it can
9525/// be used as an operand to a VMULL instruction. If the original vector size
9526/// before extension is less than 64 bits we add a an extension to resize
9527/// the vector to 64 bits.
9529 if (N->getOpcode() == ISD::SIGN_EXTEND ||
9530 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
9531 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
9532 N->getOperand(0)->getValueType(0),
9533 N->getValueType(0),
9534 N->getOpcode());
9535
9536 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9537 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&
9538 "Expected extending load");
9539
9540 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
9541 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
9542 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9543 SDValue extLoad =
9544 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
9545 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
9546
9547 return newLoad;
9548 }
9549
9550 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
9551 // have been legalized as a BITCAST from v4i32.
9552 if (N->getOpcode() == ISD::BITCAST) {
9553 SDNode *BVN = N->getOperand(0).getNode();
9555 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
9556 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9557 return DAG.getBuildVector(
9558 MVT::v2i32, SDLoc(N),
9559 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
9560 }
9561 // Construct a new BUILD_VECTOR with elements truncated to half the size.
9562 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
9563 EVT VT = N->getValueType(0);
9564 unsigned EltSize = VT.getScalarSizeInBits() / 2;
9565 unsigned NumElts = VT.getVectorNumElements();
9566 MVT TruncVT = MVT::getIntegerVT(EltSize);
9568 SDLoc dl(N);
9569 for (unsigned i = 0; i != NumElts; ++i) {
9570 const APInt &CInt = N->getConstantOperandAPInt(i);
9571 // Element types smaller than 32 bits are not legal, so use i32 elements.
9572 // The values are implicitly truncated so sext vs. zext doesn't matter.
9573 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
9574 }
9575 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
9576}
9577
9578static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
9579 unsigned Opcode = N->getOpcode();
9580 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9581 SDNode *N0 = N->getOperand(0).getNode();
9582 SDNode *N1 = N->getOperand(1).getNode();
9583 return N0->hasOneUse() && N1->hasOneUse() &&
9584 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
9585 }
9586 return false;
9587}
9588
9589static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
9590 unsigned Opcode = N->getOpcode();
9591 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9592 SDNode *N0 = N->getOperand(0).getNode();
9593 SDNode *N1 = N->getOperand(1).getNode();
9594 return N0->hasOneUse() && N1->hasOneUse() &&
9595 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
9596 }
9597 return false;
9598}
9599
9601 // Multiplications are only custom-lowered for 128-bit vectors so that
9602 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
9603 EVT VT = Op.getValueType();
9604 assert(VT.is128BitVector() && VT.isInteger() &&
9605 "unexpected type for custom-lowering ISD::MUL");
9606 SDNode *N0 = Op.getOperand(0).getNode();
9607 SDNode *N1 = Op.getOperand(1).getNode();
9608 unsigned NewOpc = 0;
9609 bool isMLA = false;
9610 bool isN0SExt = isSignExtended(N0, DAG);
9611 bool isN1SExt = isSignExtended(N1, DAG);
9612 if (isN0SExt && isN1SExt)
9613 NewOpc = ARMISD::VMULLs;
9614 else {
9615 bool isN0ZExt = isZeroExtended(N0, DAG);
9616 bool isN1ZExt = isZeroExtended(N1, DAG);
9617 if (isN0ZExt && isN1ZExt)
9618 NewOpc = ARMISD::VMULLu;
9619 else if (isN1SExt || isN1ZExt) {
9620 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
9621 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
9622 if (isN1SExt && isAddSubSExt(N0, DAG)) {
9623 NewOpc = ARMISD::VMULLs;
9624 isMLA = true;
9625 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
9626 NewOpc = ARMISD::VMULLu;
9627 isMLA = true;
9628 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
9629 std::swap(N0, N1);
9630 NewOpc = ARMISD::VMULLu;
9631 isMLA = true;
9632 }
9633 }
9634
9635 if (!NewOpc) {
9636 if (VT == MVT::v2i64)
9637 // Fall through to expand this. It is not legal.
9638 return SDValue();
9639 else
9640 // Other vector multiplications are legal.
9641 return Op;
9642 }
9643 }
9644
9645 // Legalize to a VMULL instruction.
9646 SDLoc DL(Op);
9647 SDValue Op0;
9648 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
9649 if (!isMLA) {
9650 Op0 = SkipExtensionForVMULL(N0, DAG);
9652 Op1.getValueType().is64BitVector() &&
9653 "unexpected types for extended operands to VMULL");
9654 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
9655 }
9656
9657 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
9658 // isel lowering to take advantage of no-stall back to back vmul + vmla.
9659 // vmull q0, d4, d6
9660 // vmlal q0, d5, d6
9661 // is faster than
9662 // vaddl q0, d4, d5
9663 // vmovl q1, d6
9664 // vmul q0, q0, q1
9665 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
9666 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
9667 EVT Op1VT = Op1.getValueType();
9668 return DAG.getNode(N0->getOpcode(), DL, VT,
9669 DAG.getNode(NewOpc, DL, VT,
9670 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
9671 DAG.getNode(NewOpc, DL, VT,
9672 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
9673}
9674
9676 SelectionDAG &DAG) {
9677 // TODO: Should this propagate fast-math-flags?
9678
9679 // Convert to float
9680 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
9681 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
9682 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
9683 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
9684 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
9685 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
9686 // Get reciprocal estimate.
9687 // float4 recip = vrecpeq_f32(yf);
9688 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9689 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9690 Y);
9691 // Because char has a smaller range than uchar, we can actually get away
9692 // without any newton steps. This requires that we use a weird bias
9693 // of 0xb000, however (again, this has been exhaustively tested).
9694 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
9695 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
9696 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
9697 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
9698 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
9699 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
9700 // Convert back to short.
9701 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
9702 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
9703 return X;
9704}
9705
9707 SelectionDAG &DAG) {
9708 // TODO: Should this propagate fast-math-flags?
9709
9710 SDValue N2;
9711 // Convert to float.
9712 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
9713 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
9714 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
9715 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
9716 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9717 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9718
9719 // Use reciprocal estimate and one refinement step.
9720 // float4 recip = vrecpeq_f32(yf);
9721 // recip *= vrecpsq_f32(yf, recip);
9722 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9723 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9724 N1);
9725 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9726 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9727 N1, N2);
9728 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9729 // Because short has a smaller range than ushort, we can actually get away
9730 // with only a single newton step. This requires that we use a weird bias
9731 // of 89, however (again, this has been exhaustively tested).
9732 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
9733 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9734 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9735 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
9736 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9737 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9738 // Convert back to integer and return.
9739 // return vmovn_s32(vcvt_s32_f32(result));
9740 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9741 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9742 return N0;
9743}
9744
9746 const ARMSubtarget *ST) {
9747 EVT VT = Op.getValueType();
9748 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9749 "unexpected type for custom-lowering ISD::SDIV");
9750
9751 SDLoc dl(Op);
9752 SDValue N0 = Op.getOperand(0);
9753 SDValue N1 = Op.getOperand(1);
9754 SDValue N2, N3;
9755
9756 if (VT == MVT::v8i8) {
9757 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
9758 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
9759
9760 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9761 DAG.getIntPtrConstant(4, dl));
9762 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9763 DAG.getIntPtrConstant(4, dl));
9764 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9765 DAG.getIntPtrConstant(0, dl));
9766 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9767 DAG.getIntPtrConstant(0, dl));
9768
9769 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
9770 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
9771
9772 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9773 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9774
9775 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
9776 return N0;
9777 }
9778 return LowerSDIV_v4i16(N0, N1, dl, DAG);
9779}
9780
9782 const ARMSubtarget *ST) {
9783 // TODO: Should this propagate fast-math-flags?
9784 EVT VT = Op.getValueType();
9785 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9786 "unexpected type for custom-lowering ISD::UDIV");
9787
9788 SDLoc dl(Op);
9789 SDValue N0 = Op.getOperand(0);
9790 SDValue N1 = Op.getOperand(1);
9791 SDValue N2, N3;
9792
9793 if (VT == MVT::v8i8) {
9794 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
9795 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
9796
9797 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9798 DAG.getIntPtrConstant(4, dl));
9799 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9800 DAG.getIntPtrConstant(4, dl));
9801 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9802 DAG.getIntPtrConstant(0, dl));
9803 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9804 DAG.getIntPtrConstant(0, dl));
9805
9806 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
9807 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
9808
9809 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9810 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9811
9812 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
9813 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
9814 MVT::i32),
9815 N0);
9816 return N0;
9817 }
9818
9819 // v4i16 sdiv ... Convert to float.
9820 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
9821 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
9822 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
9823 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
9824 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9825 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9826
9827 // Use reciprocal estimate and two refinement steps.
9828 // float4 recip = vrecpeq_f32(yf);
9829 // recip *= vrecpsq_f32(yf, recip);
9830 // recip *= vrecpsq_f32(yf, recip);
9831 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9832 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9833 BN1);
9834 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9835 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9836 BN1, N2);
9837 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9838 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9839 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9840 BN1, N2);
9841 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9842 // Simply multiplying by the reciprocal estimate can leave us a few ulps
9843 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
9844 // and that it will never cause us to return an answer too large).
9845 // float4 result = as_float4(as_int4(xf*recip) + 2);
9846 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9847 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9848 N1 = DAG.getConstant(2, dl, MVT::v4i32);
9849 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9850 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9851 // Convert back to integer and return.
9852 // return vmovn_u32(vcvt_s32_f32(result));
9853 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9854 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9855 return N0;
9856}
9857
9859 SDNode *N = Op.getNode();
9860 EVT VT = N->getValueType(0);
9861 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
9862
9863 SDValue Carry = Op.getOperand(2);
9864
9865 SDLoc DL(Op);
9866
9867 SDValue Result;
9868 if (Op.getOpcode() == ISD::UADDO_CARRY) {
9869 // This converts the boolean value carry into the carry flag.
9870 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9871
9872 // Do the addition proper using the carry flag we wanted.
9873 Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
9874 Op.getOperand(1), Carry);
9875
9876 // Now convert the carry flag into a boolean value.
9877 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9878 } else {
9879 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
9880 // have to invert the carry first.
9881 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9882 DAG.getConstant(1, DL, MVT::i32), Carry);
9883 // This converts the boolean value carry into the carry flag.
9884 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9885
9886 // Do the subtraction proper using the carry flag we wanted.
9887 Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
9888 Op.getOperand(1), Carry);
9889
9890 // Now convert the carry flag into a boolean value.
9891 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9892 // But the carry returned by ARMISD::SUBE is not a borrow as expected
9893 // by ISD::USUBO_CARRY, so compute 1 - C.
9894 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9895 DAG.getConstant(1, DL, MVT::i32), Carry);
9896 }
9897
9898 // Return both values.
9899 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
9900}
9901
9902SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
9903 assert(Subtarget->isTargetDarwin());
9904
9905 // For iOS, we want to call an alternative entry point: __sincos_stret,
9906 // return values are passed via sret.
9907 SDLoc dl(Op);
9908 SDValue Arg = Op.getOperand(0);
9909 EVT ArgVT = Arg.getValueType();
9910 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
9911 auto PtrVT = getPointerTy(DAG.getDataLayout());
9912
9914 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9915
9916 // Pair of floats / doubles used to pass the result.
9917 Type *RetTy = StructType::get(ArgTy, ArgTy);
9918 auto &DL = DAG.getDataLayout();
9919
9921 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
9922 SDValue SRet;
9923 if (ShouldUseSRet) {
9924 // Create stack object for sret.
9925 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
9926 const Align StackAlign = DL.getPrefTypeAlign(RetTy);
9927 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
9928 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
9929
9930 ArgListEntry Entry;
9931 Entry.Node = SRet;
9932 Entry.Ty = PointerType::getUnqual(RetTy->getContext());
9933 Entry.IsSExt = false;
9934 Entry.IsZExt = false;
9935 Entry.IsSRet = true;
9936 Args.push_back(Entry);
9938 }
9939
9940 ArgListEntry Entry;
9941 Entry.Node = Arg;
9942 Entry.Ty = ArgTy;
9943 Entry.IsSExt = false;
9944 Entry.IsZExt = false;
9945 Args.push_back(Entry);
9946
9947 RTLIB::Libcall LC =
9948 (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
9949 const char *LibcallName = getLibcallName(LC);
9951 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
9952
9954 CLI.setDebugLoc(dl)
9955 .setChain(DAG.getEntryNode())
9956 .setCallee(CC, RetTy, Callee, std::move(Args))
9957 .setDiscardResult(ShouldUseSRet);
9958 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9959
9960 if (!ShouldUseSRet)
9961 return CallResult.first;
9962
9963 SDValue LoadSin =
9964 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
9965
9966 // Address of cos field.
9967 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
9968 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
9969 SDValue LoadCos =
9970 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
9971
9972 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
9973 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
9974 LoadSin.getValue(0), LoadCos.getValue(0));
9975}
9976
9977SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
9978 bool Signed,
9979 SDValue &Chain) const {
9980 EVT VT = Op.getValueType();
9981 assert((VT == MVT::i32 || VT == MVT::i64) &&
9982 "unexpected type for custom lowering DIV");
9983 SDLoc dl(Op);
9984
9985 const auto &DL = DAG.getDataLayout();
9986 const auto &TLI = DAG.getTargetLoweringInfo();
9987
9988 const char *Name = nullptr;
9989 if (Signed)
9990 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
9991 else
9992 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
9993
9995
9997
9998 for (auto AI : {1, 0}) {
9999 ArgListEntry Arg;
10000 Arg.Node = Op.getOperand(AI);
10001 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
10002 Args.push_back(Arg);
10003 }
10004
10005 CallLoweringInfo CLI(DAG);
10006 CLI.setDebugLoc(dl)
10007 .setChain(Chain)
10009 ES, std::move(Args));
10010
10011 return LowerCallTo(CLI).first;
10012}
10013
10014// This is a code size optimisation: return the original SDIV node to
10015// DAGCombiner when we don't want to expand SDIV into a sequence of
10016// instructions, and an empty node otherwise which will cause the
10017// SDIV to be expanded in DAGCombine.
10018SDValue
10019ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
10020 SelectionDAG &DAG,
10021 SmallVectorImpl<SDNode *> &Created) const {
10022 // TODO: Support SREM
10023 if (N->getOpcode() != ISD::SDIV)
10024 return SDValue();
10025
10026 const auto &ST = DAG.getSubtarget<ARMSubtarget>();
10027 const bool MinSize = ST.hasMinSize();
10028 const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
10029 : ST.hasDivideInARMMode();
10030
10031 // Don't touch vector types; rewriting this may lead to scalarizing
10032 // the int divs.
10033 if (N->getOperand(0).getValueType().isVector())
10034 return SDValue();
10035
10036 // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
10037 // hwdiv support for this to be really profitable.
10038 if (!(MinSize && HasDivide))
10039 return SDValue();
10040
10041 // ARM mode is a bit simpler than Thumb: we can handle large power
10042 // of 2 immediates with 1 mov instruction; no further checks required,
10043 // just return the sdiv node.
10044 if (!ST.isThumb())
10045 return SDValue(N, 0);
10046
10047 // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
10048 // and thus lose the code size benefits of a MOVS that requires only 2.
10049 // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
10050 // but as it's doing exactly this, it's not worth the trouble to get TTI.
10051 if (Divisor.sgt(128))
10052 return SDValue();
10053
10054 return SDValue(N, 0);
10055}
10056
10057SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
10058 bool Signed) const {
10059 assert(Op.getValueType() == MVT::i32 &&
10060 "unexpected type for custom lowering DIV");
10061 SDLoc dl(Op);
10062
10063 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
10064 DAG.getEntryNode(), Op.getOperand(1));
10065
10066 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10067}
10068
10070 SDLoc DL(N);
10071 SDValue Op = N->getOperand(1);
10072 if (N->getValueType(0) == MVT::i32)
10073 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
10074 SDValue Lo, Hi;
10075 std::tie(Lo, Hi) = DAG.SplitScalar(Op, DL, MVT::i32, MVT::i32);
10076 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
10077 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
10078}
10079
10080void ARMTargetLowering::ExpandDIV_Windows(
10081 SDValue Op, SelectionDAG &DAG, bool Signed,
10083 const auto &DL = DAG.getDataLayout();
10084 const auto &TLI = DAG.getTargetLoweringInfo();
10085
10086 assert(Op.getValueType() == MVT::i64 &&
10087 "unexpected type for custom lowering DIV");
10088 SDLoc dl(Op);
10089
10090 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
10091
10092 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10093
10094 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
10095 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
10096 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
10097 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
10098
10099 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));
10100}
10101
10103 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
10104 EVT MemVT = LD->getMemoryVT();
10105 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10106 MemVT == MVT::v16i1) &&
10107 "Expected a predicate type!");
10108 assert(MemVT == Op.getValueType());
10109 assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
10110 "Expected a non-extending load");
10111 assert(LD->isUnindexed() && "Expected a unindexed load");
10112
10113 // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit
10114 // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
10115 // need to make sure that 8/4/2 bits are actually loaded into the correct
10116 // place, which means loading the value and then shuffling the values into
10117 // the bottom bits of the predicate.
10118 // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
10119 // for BE).
10120 // Speaking of BE, apparently the rest of llvm will assume a reverse order to
10121 // a natural VMSR(load), so needs to be reversed.
10122
10123 SDLoc dl(Op);
10124 SDValue Load = DAG.getExtLoad(
10125 ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
10127 LD->getMemOperand());
10128 SDValue Val = Load;
10129 if (DAG.getDataLayout().isBigEndian())
10130 Val = DAG.getNode(ISD::SRL, dl, MVT::i32,
10131 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Load),
10132 DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));
10133 SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);
10134 if (MemVT != MVT::v16i1)
10135 Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
10136 DAG.getConstant(0, dl, MVT::i32));
10137 return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
10138}
10139
10140void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
10141 SelectionDAG &DAG) const {
10142 LoadSDNode *LD = cast<LoadSDNode>(N);
10143 EVT MemVT = LD->getMemoryVT();
10144 assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
10145
10146 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10147 !Subtarget->isThumb1Only() && LD->isVolatile() &&
10148 LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10149 SDLoc dl(N);
10151 ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
10152 {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
10153 SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
10154 SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
10155 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
10156 Results.append({Pair, Result.getValue(2)});
10157 }
10158}
10159
10161 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10162 EVT MemVT = ST->getMemoryVT();
10163 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10164 MemVT == MVT::v16i1) &&
10165 "Expected a predicate type!");
10166 assert(MemVT == ST->getValue().getValueType());
10167 assert(!ST->isTruncatingStore() && "Expected a non-extending store");
10168 assert(ST->isUnindexed() && "Expected a unindexed store");
10169
10170 // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with
10171 // top bits unset and a scalar store.
10172 SDLoc dl(Op);
10173 SDValue Build = ST->getValue();
10174 if (MemVT != MVT::v16i1) {
10176 for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) {
10177 unsigned Elt = DAG.getDataLayout().isBigEndian()
10178 ? MemVT.getVectorNumElements() - I - 1
10179 : I;
10180 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
10181 DAG.getConstant(Elt, dl, MVT::i32)));
10182 }
10183 for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
10184 Ops.push_back(DAG.getUNDEF(MVT::i32));
10185 Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
10186 }
10187 SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
10188 if (MemVT == MVT::v16i1 && DAG.getDataLayout().isBigEndian())
10189 GRP = DAG.getNode(ISD::SRL, dl, MVT::i32,
10190 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, GRP),
10191 DAG.getConstant(16, dl, MVT::i32));
10192 return DAG.getTruncStore(
10193 ST->getChain(), dl, GRP, ST->getBasePtr(),
10195 ST->getMemOperand());
10196}
10197
10199 const ARMSubtarget *Subtarget) {
10200 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10201 EVT MemVT = ST->getMemoryVT();
10202 assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
10203
10204 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10205 !Subtarget->isThumb1Only() && ST->isVolatile() &&
10206 ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10207 SDNode *N = Op.getNode();
10208 SDLoc dl(N);
10209
10210 SDValue Lo = DAG.getNode(
10211 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10212 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
10213 MVT::i32));
10214 SDValue Hi = DAG.getNode(
10215 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10216 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
10217 MVT::i32));
10218
10219 return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
10220 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
10221 MemVT, ST->getMemOperand());
10222 } else if (Subtarget->hasMVEIntegerOps() &&
10223 ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10224 MemVT == MVT::v16i1))) {
10225 return LowerPredicateStore(Op, DAG);
10226 }
10227
10228 return SDValue();
10229}
10230
10231static bool isZeroVector(SDValue N) {
10232 return (ISD::isBuildVectorAllZeros(N.getNode()) ||
10233 (N->getOpcode() == ARMISD::VMOVIMM &&
10234 isNullConstant(N->getOperand(0))));
10235}
10236
10238 MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
10239 MVT VT = Op.getSimpleValueType();
10240 SDValue Mask = N->getMask();
10241 SDValue PassThru = N->getPassThru();
10242 SDLoc dl(Op);
10243
10244 if (isZeroVector(PassThru))
10245 return Op;
10246
10247 // MVE Masked loads use zero as the passthru value. Here we convert undef to
10248 // zero too, and other values are lowered to a select.
10249 SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
10250 DAG.getTargetConstant(0, dl, MVT::i32));
10251 SDValue NewLoad = DAG.getMaskedLoad(
10252 VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
10253 N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
10254 N->getExtensionType(), N->isExpandingLoad());
10255 SDValue Combo = NewLoad;
10256 bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST ||
10257 PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
10258 isZeroVector(PassThru->getOperand(0));
10259 if (!PassThru.isUndef() && !PassThruIsCastZero)
10260 Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
10261 return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
10262}
10263
10265 const ARMSubtarget *ST) {
10266 if (!ST->hasMVEIntegerOps())
10267 return SDValue();
10268
10269 SDLoc dl(Op);
10270 unsigned BaseOpcode = 0;
10271 switch (Op->getOpcode()) {
10272 default: llvm_unreachable("Expected VECREDUCE opcode");
10273 case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
10274 case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
10275 case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
10276 case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
10277 case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
10278 case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
10279 case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;
10280 case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;
10281 }
10282
10283 SDValue Op0 = Op->getOperand(0);
10284 EVT VT = Op0.getValueType();
10285 EVT EltVT = VT.getVectorElementType();
10286 unsigned NumElts = VT.getVectorNumElements();
10287 unsigned NumActiveLanes = NumElts;
10288
10289 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10290 NumActiveLanes == 2) &&
10291 "Only expected a power 2 vector size");
10292
10293 // Use Mul(X, Rev(X)) until 4 items remain. Going down to 4 vector elements
10294 // allows us to easily extract vector elements from the lanes.
10295 while (NumActiveLanes > 4) {
10296 unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
10297 SDValue Rev = DAG.getNode(RevOpcode, dl, VT, Op0);
10298 Op0 = DAG.getNode(BaseOpcode, dl, VT, Op0, Rev);
10299 NumActiveLanes /= 2;
10300 }
10301
10302 SDValue Res;
10303 if (NumActiveLanes == 4) {
10304 // The remaining 4 elements are summed sequentially
10305 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10306 DAG.getConstant(0 * NumElts / 4, dl, MVT::i32));
10307 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10308 DAG.getConstant(1 * NumElts / 4, dl, MVT::i32));
10309 SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10310 DAG.getConstant(2 * NumElts / 4, dl, MVT::i32));
10311 SDValue Ext3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10312 DAG.getConstant(3 * NumElts / 4, dl, MVT::i32));
10313 SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10314 SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
10315 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
10316 } else {
10317 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10318 DAG.getConstant(0, dl, MVT::i32));
10319 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10320 DAG.getConstant(1, dl, MVT::i32));
10321 Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10322 }
10323
10324 // Result type may be wider than element type.
10325 if (EltVT != Op->getValueType(0))
10326 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
10327 return Res;
10328}
10329
10331 const ARMSubtarget *ST) {
10332 if (!ST->hasMVEFloatOps())
10333 return SDValue();
10334 return LowerVecReduce(Op, DAG, ST);
10335}
10336
10338 const ARMSubtarget *ST) {
10339 if (!ST->hasNEON())
10340 return SDValue();
10341
10342 SDLoc dl(Op);
10343 SDValue Op0 = Op->getOperand(0);
10344 EVT VT = Op0.getValueType();
10345 EVT EltVT = VT.getVectorElementType();
10346
10347 unsigned PairwiseIntrinsic = 0;
10348 switch (Op->getOpcode()) {
10349 default:
10350 llvm_unreachable("Expected VECREDUCE opcode");
10352 PairwiseIntrinsic = Intrinsic::arm_neon_vpminu;
10353 break;
10355 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu;
10356 break;
10358 PairwiseIntrinsic = Intrinsic::arm_neon_vpmins;
10359 break;
10361 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs;
10362 break;
10363 }
10364 SDValue PairwiseOp = DAG.getConstant(PairwiseIntrinsic, dl, MVT::i32);
10365
10366 unsigned NumElts = VT.getVectorNumElements();
10367 unsigned NumActiveLanes = NumElts;
10368
10369 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10370 NumActiveLanes == 2) &&
10371 "Only expected a power 2 vector size");
10372
10373 // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors.
10374 if (VT.is128BitVector()) {
10375 SDValue Lo, Hi;
10376 std::tie(Lo, Hi) = DAG.SplitVector(Op0, dl);
10377 VT = Lo.getValueType();
10378 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Lo, Hi});
10379 NumActiveLanes /= 2;
10380 }
10381
10382 // Use pairwise reductions until one lane remains
10383 while (NumActiveLanes > 1) {
10384 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Op0, Op0});
10385 NumActiveLanes /= 2;
10386 }
10387
10388 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10389 DAG.getConstant(0, dl, MVT::i32));
10390
10391 // Result type may be wider than element type.
10392 if (EltVT != Op.getValueType()) {
10393 unsigned Extend = 0;
10394 switch (Op->getOpcode()) {
10395 default:
10396 llvm_unreachable("Expected VECREDUCE opcode");
10399 Extend = ISD::ZERO_EXTEND;
10400 break;
10403 Extend = ISD::SIGN_EXTEND;
10404 break;
10405 }
10406 Res = DAG.getNode(Extend, dl, Op.getValueType(), Res);
10407 }
10408 return Res;
10409}
10410
10412 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))
10413 // Acquire/Release load/store is not legal for targets without a dmb or
10414 // equivalent available.
10415 return SDValue();
10416
10417 // Monotonic load/store is legal for all targets.
10418 return Op;
10419}
10420
10423 SelectionDAG &DAG,
10424 const ARMSubtarget *Subtarget) {
10425 SDLoc DL(N);
10426 // Under Power Management extensions, the cycle-count is:
10427 // mrc p15, #0, <Rt>, c9, c13, #0
10428 SDValue Ops[] = { N->getOperand(0), // Chain
10429 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
10430 DAG.getTargetConstant(15, DL, MVT::i32),
10431 DAG.getTargetConstant(0, DL, MVT::i32),
10432 DAG.getTargetConstant(9, DL, MVT::i32),
10433 DAG.getTargetConstant(13, DL, MVT::i32),
10434 DAG.getTargetConstant(0, DL, MVT::i32)
10435 };
10436
10437 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
10438 DAG.getVTList(MVT::i32, MVT::Other), Ops);
10439 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
10440 DAG.getConstant(0, DL, MVT::i32)));
10441 Results.push_back(Cycles32.getValue(1));
10442}
10443
10445 SDLoc dl(V.getNode());
10446 auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
10447 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10448 if (isBigEndian)
10449 std::swap (VLo, VHi);
10450 SDValue RegClass =
10451 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
10452 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
10453 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
10454 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
10455 return SDValue(
10456 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
10457}
10458
10461 SelectionDAG &DAG) {
10462 assert(N->getValueType(0) == MVT::i64 &&
10463 "AtomicCmpSwap on types less than 64 should be legal");
10464 SDValue Ops[] = {N->getOperand(1),
10465 createGPRPairNode(DAG, N->getOperand(2)),
10466 createGPRPairNode(DAG, N->getOperand(3)),
10467 N->getOperand(0)};
10468 SDNode *CmpSwap = DAG.getMachineNode(
10469 ARM::CMP_SWAP_64, SDLoc(N),
10470 DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
10471
10472 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
10473 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
10474
10475 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10476
10477 SDValue Lo =
10478 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
10479 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10480 SDValue Hi =
10481 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
10482 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10483 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i64, Lo, Hi));
10484 Results.push_back(SDValue(CmpSwap, 2));
10485}
10486
10487SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
10488 SDLoc dl(Op);
10489 EVT VT = Op.getValueType();
10490 SDValue Chain = Op.getOperand(0);
10491 SDValue LHS = Op.getOperand(1);
10492 SDValue RHS = Op.getOperand(2);
10493 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10494 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
10495
10496 // If we don't have instructions of this float type then soften to a libcall
10497 // and use SETCC instead.
10498 if (isUnsupportedFloatingType(LHS.getValueType())) {
10500 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS, Chain, IsSignaling);
10501 if (!RHS.getNode()) {
10502 RHS = DAG.getConstant(0, dl, LHS.getValueType());
10503 CC = ISD::SETNE;
10504 }
10505 SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,
10506 DAG.getCondCode(CC));
10507 return DAG.getMergeValues({Result, Chain}, dl);
10508 }
10509
10510 ARMCC::CondCodes CondCode, CondCode2;
10511 FPCCToARMCC(CC, CondCode, CondCode2);
10512
10513 // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit
10514 // in CMPFP and CMPFPE, but instead it should be made explicit by these
10515 // instructions using a chain instead of glue. This would also fix the problem
10516 // here (and also in LowerSELECT_CC) where we generate two comparisons when
10517 // CondCode2 != AL.
10518 SDValue True = DAG.getConstant(1, dl, VT);
10519 SDValue False = DAG.getConstant(0, dl, VT);
10520 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
10521 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
10522 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10523 SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG);
10524 if (CondCode2 != ARMCC::AL) {
10525 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
10526 Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10527 Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG);
10528 }
10529 return DAG.getMergeValues({Result, Chain}, dl);
10530}
10531
10532SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
10534
10535 EVT VT = getPointerTy(DAG.getDataLayout());
10536 SDLoc DL(Op);
10537 int FI = MFI.CreateFixedObject(4, 0, false);
10538 return DAG.getFrameIndex(FI, VT);
10539}
10540
10542 LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
10543 switch (Op.getOpcode()) {
10544 default: llvm_unreachable("Don't know how to custom lower this!");
10545 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
10546 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10547 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10548 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10549 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10550 case ISD::SELECT: return LowerSELECT(Op, DAG);
10551 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10552 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
10553 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
10554 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
10555 case ISD::VASTART: return LowerVASTART(Op, DAG);
10556 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
10557 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
10558 case ISD::SINT_TO_FP:
10559 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10562 case ISD::FP_TO_SINT:
10563 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
10565 case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG, Subtarget);
10566 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
10567 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10568 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10569 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
10570 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
10571 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
10572 case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
10573 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
10574 Subtarget);
10575 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
10576 case ISD::SHL:
10577 case ISD::SRL:
10578 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
10579 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
10580 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
10581 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
10582 case ISD::SRL_PARTS:
10583 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
10584 case ISD::CTTZ:
10585 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
10586 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
10587 case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
10588 case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
10589 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
10590 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
10591 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
10592 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);
10593 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10594 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
10595 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
10596 case ISD::TRUNCATE: return LowerTruncate(Op.getNode(), DAG, Subtarget);
10597 case ISD::SIGN_EXTEND:
10598 case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget);
10599 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
10600 case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
10601 case ISD::SET_FPMODE:
10602 return LowerSET_FPMODE(Op, DAG);
10603 case ISD::RESET_FPMODE:
10604 return LowerRESET_FPMODE(Op, DAG);
10605 case ISD::MUL: return LowerMUL(Op, DAG);
10606 case ISD::SDIV:
10607 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10608 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
10609 return LowerSDIV(Op, DAG, Subtarget);
10610 case ISD::UDIV:
10611 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10612 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
10613 return LowerUDIV(Op, DAG, Subtarget);
10614 case ISD::UADDO_CARRY:
10615 case ISD::USUBO_CARRY:
10616 return LowerUADDSUBO_CARRY(Op, DAG);
10617 case ISD::SADDO:
10618 case ISD::SSUBO:
10619 return LowerSignedALUO(Op, DAG);
10620 case ISD::UADDO:
10621 case ISD::USUBO:
10622 return LowerUnsignedALUO(Op, DAG);
10623 case ISD::SADDSAT:
10624 case ISD::SSUBSAT:
10625 case ISD::UADDSAT:
10626 case ISD::USUBSAT:
10627 return LowerADDSUBSAT(Op, DAG, Subtarget);
10628 case ISD::LOAD:
10629 return LowerPredicateLoad(Op, DAG);
10630 case ISD::STORE:
10631 return LowerSTORE(Op, DAG, Subtarget);
10632 case ISD::MLOAD:
10633 return LowerMLOAD(Op, DAG);
10634 case ISD::VECREDUCE_MUL:
10635 case ISD::VECREDUCE_AND:
10636 case ISD::VECREDUCE_OR:
10637 case ISD::VECREDUCE_XOR:
10638 return LowerVecReduce(Op, DAG, Subtarget);
10643 return LowerVecReduceF(Op, DAG, Subtarget);
10648 return LowerVecReduceMinMax(Op, DAG, Subtarget);
10649 case ISD::ATOMIC_LOAD:
10650 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
10651 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
10652 case ISD::SDIVREM:
10653 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
10655 if (Subtarget->isTargetWindows())
10656 return LowerDYNAMIC_STACKALLOC(Op, DAG);
10657 llvm_unreachable("Don't know how to custom lower this!");
10659 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
10661 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10662 case ISD::STRICT_FSETCC:
10663 case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
10664 case ISD::SPONENTRY:
10665 return LowerSPONENTRY(Op, DAG);
10666 case ARMISD::WIN__DBZCHK: return SDValue();
10667 }
10668}
10669
10671 SelectionDAG &DAG) {
10672 unsigned IntNo = N->getConstantOperandVal(0);
10673 unsigned Opc = 0;
10674 if (IntNo == Intrinsic::arm_smlald)
10675 Opc = ARMISD::SMLALD;
10676 else if (IntNo == Intrinsic::arm_smlaldx)
10677 Opc = ARMISD::SMLALDX;
10678 else if (IntNo == Intrinsic::arm_smlsld)
10679 Opc = ARMISD::SMLSLD;
10680 else if (IntNo == Intrinsic::arm_smlsldx)
10681 Opc = ARMISD::SMLSLDX;
10682 else
10683 return;
10684
10685 SDLoc dl(N);
10686 SDValue Lo, Hi;
10687 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(3), dl, MVT::i32, MVT::i32);
10688
10689 SDValue LongMul = DAG.getNode(Opc, dl,
10690 DAG.getVTList(MVT::i32, MVT::i32),
10691 N->getOperand(1), N->getOperand(2),
10692 Lo, Hi);
10693 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
10694 LongMul.getValue(0), LongMul.getValue(1)));
10695}
10696
10697/// ReplaceNodeResults - Replace the results of node with an illegal result
10698/// type with new values built out of custom code.
10701 SelectionDAG &DAG) const {
10702 SDValue Res;
10703 switch (N->getOpcode()) {
10704 default:
10705 llvm_unreachable("Don't know how to custom expand this!");
10706 case ISD::READ_REGISTER:
10708 break;
10709 case ISD::BITCAST:
10710 Res = ExpandBITCAST(N, DAG, Subtarget);
10711 break;
10712 case ISD::SRL:
10713 case ISD::SRA:
10714 case ISD::SHL:
10715 Res = Expand64BitShift(N, DAG, Subtarget);
10716 break;
10717 case ISD::SREM:
10718 case ISD::UREM:
10719 Res = LowerREM(N, DAG);
10720 break;
10721 case ISD::SDIVREM:
10722 case ISD::UDIVREM:
10723 Res = LowerDivRem(SDValue(N, 0), DAG);
10724 assert(Res.getNumOperands() == 2 && "DivRem needs two values");
10725 Results.push_back(Res.getValue(0));
10726 Results.push_back(Res.getValue(1));
10727 return;
10728 case ISD::SADDSAT:
10729 case ISD::SSUBSAT:
10730 case ISD::UADDSAT:
10731 case ISD::USUBSAT:
10732 Res = LowerADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
10733 break;
10735 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
10736 return;
10737 case ISD::UDIV:
10738 case ISD::SDIV:
10739 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
10740 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
10741 Results);
10744 return;
10746 return ReplaceLongIntrinsic(N, Results, DAG);
10747 case ISD::LOAD:
10748 LowerLOAD(N, Results, DAG);
10749 break;
10750 case ISD::TRUNCATE:
10751 Res = LowerTruncate(N, DAG, Subtarget);
10752 break;
10753 case ISD::SIGN_EXTEND:
10754 case ISD::ZERO_EXTEND:
10755 Res = LowerVectorExtend(N, DAG, Subtarget);
10756 break;
10759 Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
10760 break;
10761 }
10762 if (Res.getNode())
10763 Results.push_back(Res);
10764}
10765
10766//===----------------------------------------------------------------------===//
10767// ARM Scheduler Hooks
10768//===----------------------------------------------------------------------===//
10769
10770/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
10771/// registers the function context.
10772void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
10774 MachineBasicBlock *DispatchBB,
10775 int FI) const {
10776 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
10777 "ROPI/RWPI not currently supported with SjLj");
10778 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10779 DebugLoc dl = MI.getDebugLoc();
10780 MachineFunction *MF = MBB->getParent();
10784 const Function &F = MF->getFunction();
10785
10786 bool isThumb = Subtarget->isThumb();
10787 bool isThumb2 = Subtarget->isThumb2();
10788
10789 unsigned PCLabelId = AFI->createPICLabelUId();
10790 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
10792 ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
10793 unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
10794
10795 const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
10796 : &ARM::GPRRegClass;
10797
10798 // Grab constant pool and fixed stack memory operands.
10799 MachineMemOperand *CPMMO =
10802
10803 MachineMemOperand *FIMMOSt =
10806
10807 // Load the address of the dispatch MBB into the jump buffer.
10808 if (isThumb2) {
10809 // Incoming value: jbuf
10810 // ldr.n r5, LCPI1_1
10811 // orr r5, r5, #1
10812 // add r5, pc
10813 // str r5, [$jbuf, #+4] ; &jbuf[1]
10814 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10815 BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
10817 .addMemOperand(CPMMO)
10819 // Set the low bit because of thumb mode.
10820 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10821 BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
10822 .addReg(NewVReg1, RegState::Kill)
10823 .addImm(0x01)
10825 .add(condCodeOp());
10826 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10827 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
10828 .addReg(NewVReg2, RegState::Kill)
10829 .addImm(PCLabelId);
10830 BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
10831 .addReg(NewVReg3, RegState::Kill)
10832 .addFrameIndex(FI)
10833 .addImm(36) // &jbuf[1] :: pc
10834 .addMemOperand(FIMMOSt)
10836 } else if (isThumb) {
10837 // Incoming value: jbuf
10838 // ldr.n r1, LCPI1_4
10839 // add r1, pc
10840 // mov r2, #1
10841 // orrs r1, r2
10842 // add r2, $jbuf, #+4 ; &jbuf[1]
10843 // str r1, [r2]
10844 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10845 BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
10847 .addMemOperand(CPMMO)
10849 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10850 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
10851 .addReg(NewVReg1, RegState::Kill)
10852 .addImm(PCLabelId);
10853 // Set the low bit because of thumb mode.
10854 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10855 BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
10856 .addReg(ARM::CPSR, RegState::Define)
10857 .addImm(1)
10859 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10860 BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
10861 .addReg(ARM::CPSR, RegState::Define)
10862 .addReg(NewVReg2, RegState::Kill)
10863 .addReg(NewVReg3, RegState::Kill)
10865 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10866 BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
10867 .addFrameIndex(FI)
10868 .addImm(36); // &jbuf[1] :: pc
10869 BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
10870 .addReg(NewVReg4, RegState::Kill)
10871 .addReg(NewVReg5, RegState::Kill)
10872 .addImm(0)
10873 .addMemOperand(FIMMOSt)
10875 } else {
10876 // Incoming value: jbuf
10877 // ldr r1, LCPI1_1
10878 // add r1, pc, r1
10879 // str r1, [$jbuf, #+4] ; &jbuf[1]
10880 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10881 BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
10883 .addImm(0)
10884 .addMemOperand(CPMMO)
10886 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10887 BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
10888 .addReg(NewVReg1, RegState::Kill)
10889 .addImm(PCLabelId)
10891 BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
10892 .addReg(NewVReg2, RegState::Kill)
10893 .addFrameIndex(FI)
10894 .addImm(36) // &jbuf[1] :: pc
10895 .addMemOperand(FIMMOSt)
10897 }
10898}
10899
10900void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
10901 MachineBasicBlock *MBB) const {
10902 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10903 DebugLoc dl = MI.getDebugLoc();
10904 MachineFunction *MF = MBB->getParent();
10906 MachineFrameInfo &MFI = MF->getFrameInfo();
10907 int FI = MFI.getFunctionContextIndex();
10908
10909 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10910 : &ARM::GPRnopcRegClass;
10911
10912 // Get a mapping of the call site numbers to all of the landing pads they're
10913 // associated with.
10915 unsigned MaxCSNum = 0;
10916 for (MachineBasicBlock &BB : *MF) {
10917 if (!BB.isEHPad())
10918 continue;
10919
10920 // FIXME: We should assert that the EH_LABEL is the first MI in the landing
10921 // pad.
10922 for (MachineInstr &II : BB) {
10923 if (!II.isEHLabel())
10924 continue;
10925
10926 MCSymbol *Sym = II.getOperand(0).getMCSymbol();
10927 if (!MF->hasCallSiteLandingPad(Sym)) continue;
10928
10929 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
10930 for (unsigned Idx : CallSiteIdxs) {
10931 CallSiteNumToLPad[Idx].push_back(&BB);
10932 MaxCSNum = std::max(MaxCSNum, Idx);
10933 }
10934 break;
10935 }
10936 }
10937
10938 // Get an ordered list of the machine basic blocks for the jump table.
10939 std::vector<MachineBasicBlock*> LPadList;
10941 LPadList.reserve(CallSiteNumToLPad.size());
10942 for (unsigned I = 1; I <= MaxCSNum; ++I) {
10943 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
10944 for (MachineBasicBlock *MBB : MBBList) {
10945 LPadList.push_back(MBB);
10946 InvokeBBs.insert(MBB->pred_begin(), MBB->pred_end());
10947 }
10948 }
10949
10950 assert(!LPadList.empty() &&
10951 "No landing pad destinations for the dispatch jump table!");
10952
10953 // Create the jump table and associated information.
10955 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
10956 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
10957
10958 // Create the MBBs for the dispatch code.
10959
10960 // Shove the dispatch's address into the return slot in the function context.
10961 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
10962 DispatchBB->setIsEHPad();
10963
10964 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
10965 unsigned trap_opcode;
10966 if (Subtarget->isThumb())
10967 trap_opcode = ARM::tTRAP;
10968 else
10969 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
10970
10971 BuildMI(TrapBB, dl, TII->get(trap_opcode));
10972 DispatchBB->addSuccessor(TrapBB);
10973
10974 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
10975 DispatchBB->addSuccessor(DispContBB);
10976
10977 // Insert and MBBs.
10978 MF->insert(MF->end(), DispatchBB);
10979 MF->insert(MF->end(), DispContBB);
10980 MF->insert(MF->end(), TrapBB);
10981
10982 // Insert code into the entry block that creates and registers the function
10983 // context.
10984 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
10985
10986 MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
10989
10991 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
10992
10993 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
10994 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
10995
10996 // Add a register mask with no preserved registers. This results in all
10997 // registers being marked as clobbered. This can't work if the dispatch block
10998 // is in a Thumb1 function and is linked with ARM code which uses the FP
10999 // registers, as there is no way to preserve the FP registers in Thumb1 mode.
11001
11002 bool IsPositionIndependent = isPositionIndependent();
11003 unsigned NumLPads = LPadList.size();
11004 if (Subtarget->isThumb2()) {
11005 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11006 BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
11007 .addFrameIndex(FI)
11008 .addImm(4)
11009 .addMemOperand(FIMMOLd)
11011
11012 if (NumLPads < 256) {
11013 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
11014 .addReg(NewVReg1)
11015 .addImm(LPadList.size())
11017 } else {
11018 Register VReg1 = MRI->createVirtualRegister(TRC);
11019 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
11020 .addImm(NumLPads & 0xFFFF)
11022
11023 unsigned VReg2 = VReg1;
11024 if ((NumLPads & 0xFFFF0000) != 0) {
11025 VReg2 = MRI->createVirtualRegister(TRC);
11026 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
11027 .addReg(VReg1)
11028 .addImm(NumLPads >> 16)
11030 }
11031
11032 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
11033 .addReg(NewVReg1)
11034 .addReg(VReg2)
11036 }
11037
11038 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
11039 .addMBB(TrapBB)
11041 .addReg(ARM::CPSR);
11042
11043 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11044 BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
11045 .addJumpTableIndex(MJTI)
11047
11048 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11049 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
11050 .addReg(NewVReg3, RegState::Kill)
11051 .addReg(NewVReg1)
11054 .add(condCodeOp());
11055
11056 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
11057 .addReg(NewVReg4, RegState::Kill)
11058 .addReg(NewVReg1)
11059 .addJumpTableIndex(MJTI);
11060 } else if (Subtarget->isThumb()) {
11061 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11062 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
11063 .addFrameIndex(FI)
11064 .addImm(1)
11065 .addMemOperand(FIMMOLd)
11067
11068 if (NumLPads < 256) {
11069 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
11070 .addReg(NewVReg1)
11071 .addImm(NumLPads)
11073 } else {
11074 MachineConstantPool *ConstantPool = MF->getConstantPool();
11075 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11076 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11077
11078 // MachineConstantPool wants an explicit alignment.
11079 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11080 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11081
11082 Register VReg1 = MRI->createVirtualRegister(TRC);
11083 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
11084 .addReg(VReg1, RegState::Define)
11087 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
11088 .addReg(NewVReg1)
11089 .addReg(VReg1)
11091 }
11092
11093 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
11094 .addMBB(TrapBB)
11096 .addReg(ARM::CPSR);
11097
11098 Register NewVReg2 = MRI->createVirtualRegister(TRC);
11099 BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
11100 .addReg(ARM::CPSR, RegState::Define)
11101 .addReg(NewVReg1)
11102 .addImm(2)
11104
11105 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11106 BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
11107 .addJumpTableIndex(MJTI)
11109
11110 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11111 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
11112 .addReg(ARM::CPSR, RegState::Define)
11113 .addReg(NewVReg2, RegState::Kill)
11114 .addReg(NewVReg3)
11116
11117 MachineMemOperand *JTMMOLd =
11118 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11120
11121 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11122 BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
11123 .addReg(NewVReg4, RegState::Kill)
11124 .addImm(0)
11125 .addMemOperand(JTMMOLd)
11127
11128 unsigned NewVReg6 = NewVReg5;
11129 if (IsPositionIndependent) {
11130 NewVReg6 = MRI->createVirtualRegister(TRC);
11131 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
11132 .addReg(ARM::CPSR, RegState::Define)
11133 .addReg(NewVReg5, RegState::Kill)
11134 .addReg(NewVReg3)
11136 }
11137
11138 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
11139 .addReg(NewVReg6, RegState::Kill)
11140 .addJumpTableIndex(MJTI);
11141 } else {
11142 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11143 BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
11144 .addFrameIndex(FI)
11145 .addImm(4)
11146 .addMemOperand(FIMMOLd)
11148
11149 if (NumLPads < 256) {
11150 BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
11151 .addReg(NewVReg1)
11152 .addImm(NumLPads)
11154 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
11155 Register VReg1 = MRI->createVirtualRegister(TRC);
11156 BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
11157 .addImm(NumLPads & 0xFFFF)
11159
11160 unsigned VReg2 = VReg1;
11161 if ((NumLPads & 0xFFFF0000) != 0) {
11162 VReg2 = MRI->createVirtualRegister(TRC);
11163 BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
11164 .addReg(VReg1)
11165 .addImm(NumLPads >> 16)
11167 }
11168
11169 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11170 .addReg(NewVReg1)
11171 .addReg(VReg2)
11173 } else {
11174 MachineConstantPool *ConstantPool = MF->getConstantPool();
11175 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11176 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11177
11178 // MachineConstantPool wants an explicit alignment.
11179 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11180 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11181
11182 Register VReg1 = MRI->createVirtualRegister(TRC);
11183 BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
11184 .addReg(VReg1, RegState::Define)
11186 .addImm(0)
11188 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11189 .addReg(NewVReg1)
11190 .addReg(VReg1, RegState::Kill)
11192 }
11193
11194 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
11195 .addMBB(TrapBB)
11197 .addReg(ARM::CPSR);
11198
11199 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11200 BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
11201 .addReg(NewVReg1)
11204 .add(condCodeOp());
11205 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11206 BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
11207 .addJumpTableIndex(MJTI)
11209
11210 MachineMemOperand *JTMMOLd =
11211 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11213 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11214 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
11215 .addReg(NewVReg3, RegState::Kill)
11216 .addReg(NewVReg4)
11217 .addImm(0)
11218 .addMemOperand(JTMMOLd)
11220
11221 if (IsPositionIndependent) {
11222 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
11223 .addReg(NewVReg5, RegState::Kill)
11224 .addReg(NewVReg4)
11225 .addJumpTableIndex(MJTI);
11226 } else {
11227 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
11228 .addReg(NewVReg5, RegState::Kill)
11229 .addJumpTableIndex(MJTI);
11230 }
11231 }
11232
11233 // Add the jump table entries as successors to the MBB.
11235 for (MachineBasicBlock *CurMBB : LPadList) {
11236 if (SeenMBBs.insert(CurMBB).second)
11237 DispContBB->addSuccessor(CurMBB);
11238 }
11239
11240 // N.B. the order the invoke BBs are processed in doesn't matter here.
11241 const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
11243 for (MachineBasicBlock *BB : InvokeBBs) {
11244
11245 // Remove the landing pad successor from the invoke block and replace it
11246 // with the new dispatch block.
11247 SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11248 while (!Successors.empty()) {
11249 MachineBasicBlock *SMBB = Successors.pop_back_val();
11250 if (SMBB->isEHPad()) {
11251 BB->removeSuccessor(SMBB);
11252 MBBLPads.push_back(SMBB);
11253 }
11254 }
11255
11256 BB->addSuccessor(DispatchBB, BranchProbability::getZero());
11257 BB->normalizeSuccProbs();
11258
11259 // Find the invoke call and mark all of the callee-saved registers as
11260 // 'implicit defined' so that they're spilled. This prevents code from
11261 // moving instructions to before the EH block, where they will never be
11262 // executed.
11264 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
11265 if (!II->isCall()) continue;
11266
11269 OI = II->operands_begin(), OE = II->operands_end();
11270 OI != OE; ++OI) {
11271 if (!OI->isReg()) continue;
11272 DefRegs[OI->getReg()] = true;
11273 }
11274
11275 MachineInstrBuilder MIB(*MF, &*II);
11276
11277 for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
11278 unsigned Reg = SavedRegs[i];
11279 if (Subtarget->isThumb2() &&
11280 !ARM::tGPRRegClass.contains(Reg) &&
11281 !ARM::hGPRRegClass.contains(Reg))
11282 continue;
11283 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
11284 continue;
11285 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
11286 continue;
11287 if (!DefRegs[Reg])
11289 }
11290
11291 break;
11292 }
11293 }
11294
11295 // Mark all former landing pads as non-landing pads. The dispatch is the only
11296 // landing pad now.
11297 for (MachineBasicBlock *MBBLPad : MBBLPads)
11298 MBBLPad->setIsEHPad(false);
11299
11300 // The instruction is gone now.
11301 MI.eraseFromParent();
11302}
11303
11304static
11306 for (MachineBasicBlock *S : MBB->successors())
11307 if (S != Succ)
11308 return S;
11309 llvm_unreachable("Expecting a BB with two successors!");
11310}
11311
11312/// Return the load opcode for a given load size. If load size >= 8,
11313/// neon opcode will be returned.
11314static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
11315 if (LdSize >= 8)
11316 return LdSize == 16 ? ARM::VLD1q32wb_fixed
11317 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
11318 if (IsThumb1)
11319 return LdSize == 4 ? ARM::tLDRi
11320 : LdSize == 2 ? ARM::tLDRHi
11321 : LdSize == 1 ? ARM::tLDRBi : 0;
11322 if (IsThumb2)
11323 return LdSize == 4 ? ARM::t2LDR_POST
11324 : LdSize == 2 ? ARM::t2LDRH_POST
11325 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
11326 return LdSize == 4 ? ARM::LDR_POST_IMM
11327 : LdSize == 2 ? ARM::LDRH_POST
11328 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
11329}
11330
11331/// Return the store opcode for a given store size. If store size >= 8,
11332/// neon opcode will be returned.
11333static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
11334 if (StSize >= 8)
11335 return StSize == 16 ? ARM::VST1q32wb_fixed
11336 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
11337 if (IsThumb1)
11338 return StSize == 4 ? ARM::tSTRi
11339 : StSize == 2 ? ARM::tSTRHi
11340 : StSize == 1 ? ARM::tSTRBi : 0;
11341 if (IsThumb2)
11342 return StSize == 4 ? ARM::t2STR_POST
11343 : StSize == 2 ? ARM::t2STRH_POST
11344 : StSize == 1 ? ARM::t2STRB_POST : 0;
11345 return StSize == 4 ? ARM::STR_POST_IMM
11346 : StSize == 2 ? ARM::STRH_POST
11347 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
11348}
11349
11350/// Emit a post-increment load operation with given size. The instructions
11351/// will be added to BB at Pos.
11353 const TargetInstrInfo *TII, const DebugLoc &dl,
11354 unsigned LdSize, unsigned Data, unsigned AddrIn,
11355 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11356 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
11357 assert(LdOpc != 0 && "Should have a load opcode");
11358 if (LdSize >= 8) {
11359 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11360 .addReg(AddrOut, RegState::Define)
11361 .addReg(AddrIn)
11362 .addImm(0)
11364 } else if (IsThumb1) {
11365 // load + update AddrIn
11366 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11367 .addReg(AddrIn)
11368 .addImm(0)
11370 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11371 .add(t1CondCodeOp())
11372 .addReg(AddrIn)
11373 .addImm(LdSize)
11375 } else if (IsThumb2) {
11376 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11377 .addReg(AddrOut, RegState::Define)
11378 .addReg(AddrIn)
11379 .addImm(LdSize)
11381 } else { // arm
11382 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11383 .addReg(AddrOut, RegState::Define)
11384 .addReg(AddrIn)
11385 .addReg(0)
11386 .addImm(LdSize)
11388 }
11389}
11390
11391/// Emit a post-increment store operation with given size. The instructions
11392/// will be added to BB at Pos.
11394 const TargetInstrInfo *TII, const DebugLoc &dl,
11395 unsigned StSize, unsigned Data, unsigned AddrIn,
11396 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11397 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
11398 assert(StOpc != 0 && "Should have a store opcode");
11399 if (StSize >= 8) {
11400 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11401 .addReg(AddrIn)
11402 .addImm(0)
11403 .addReg(Data)
11405 } else if (IsThumb1) {
11406 // store + update AddrIn
11407 BuildMI(*BB, Pos, dl, TII->get(StOpc))
11408 .addReg(Data)
11409 .addReg(AddrIn)
11410 .addImm(0)
11412 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11413 .add(t1CondCodeOp())
11414 .addReg(AddrIn)
11415 .addImm(StSize)
11417 } else if (IsThumb2) {
11418 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11419 .addReg(Data)
11420 .addReg(AddrIn)
11421 .addImm(StSize)
11423 } else { // arm
11424 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11425 .addReg(Data)
11426 .addReg(AddrIn)
11427 .addReg(0)
11428 .addImm(StSize)
11430 }
11431}
11432
11434ARMTargetLowering::EmitStructByval(MachineInstr &MI,
11435 MachineBasicBlock *BB) const {
11436 // This pseudo instruction has 3 operands: dst, src, size
11437 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
11438 // Otherwise, we will generate unrolled scalar copies.
11439 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11440 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11442
11443 Register dest = MI.getOperand(0).getReg();
11444 Register src = MI.getOperand(1).getReg();
11445 unsigned SizeVal = MI.getOperand(2).getImm();
11446 unsigned Alignment = MI.getOperand(3).getImm();
11447 DebugLoc dl = MI.getDebugLoc();
11448
11449 MachineFunction *MF = BB->getParent();
11451 unsigned UnitSize = 0;
11452 const TargetRegisterClass *TRC = nullptr;
11453 const TargetRegisterClass *VecTRC = nullptr;
11454
11455 bool IsThumb1 = Subtarget->isThumb1Only();
11456 bool IsThumb2 = Subtarget->isThumb2();
11457 bool IsThumb = Subtarget->isThumb();
11458
11459 if (Alignment & 1) {
11460 UnitSize = 1;
11461 } else if (Alignment & 2) {
11462 UnitSize = 2;
11463 } else {
11464 // Check whether we can use NEON instructions.
11465 if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
11466 Subtarget->hasNEON()) {
11467 if ((Alignment % 16 == 0) && SizeVal >= 16)
11468 UnitSize = 16;
11469 else if ((Alignment % 8 == 0) && SizeVal >= 8)
11470 UnitSize = 8;
11471 }
11472 // Can't use NEON instructions.
11473 if (UnitSize == 0)
11474 UnitSize = 4;
11475 }
11476
11477 // Select the correct opcode and register class for unit size load/store
11478 bool IsNeon = UnitSize >= 8;
11479 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
11480 if (IsNeon)
11481 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
11482 : UnitSize == 8 ? &ARM::DPRRegClass
11483 : nullptr;
11484
11485 unsigned BytesLeft = SizeVal % UnitSize;
11486 unsigned LoopSize = SizeVal - BytesLeft;
11487
11488 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11489 // Use LDR and STR to copy.
11490 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
11491 // [destOut] = STR_POST(scratch, destIn, UnitSize)
11492 unsigned srcIn = src;
11493 unsigned destIn = dest;
11494 for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
11495 Register srcOut = MRI.createVirtualRegister(TRC);
11496 Register destOut = MRI.createVirtualRegister(TRC);
11497 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11498 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
11499 IsThumb1, IsThumb2);
11500 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
11501 IsThumb1, IsThumb2);
11502 srcIn = srcOut;
11503 destIn = destOut;
11504 }
11505
11506 // Handle the leftover bytes with LDRB and STRB.
11507 // [scratch, srcOut] = LDRB_POST(srcIn, 1)
11508 // [destOut] = STRB_POST(scratch, destIn, 1)
11509 for (unsigned i = 0; i < BytesLeft; i++) {
11510 Register srcOut = MRI.createVirtualRegister(TRC);
11511 Register destOut = MRI.createVirtualRegister(TRC);
11512 Register scratch = MRI.createVirtualRegister(TRC);
11513 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
11514 IsThumb1, IsThumb2);
11515 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
11516 IsThumb1, IsThumb2);
11517 srcIn = srcOut;
11518 destIn = destOut;
11519 }
11520 MI.eraseFromParent(); // The instruction is gone now.
11521 return BB;
11522 }
11523
11524 // Expand the pseudo op to a loop.
11525 // thisMBB:
11526 // ...
11527 // movw varEnd, # --> with thumb2
11528 // movt varEnd, #
11529 // ldrcp varEnd, idx --> without thumb2
11530 // fallthrough --> loopMBB
11531 // loopMBB:
11532 // PHI varPhi, varEnd, varLoop
11533 // PHI srcPhi, src, srcLoop
11534 // PHI destPhi, dst, destLoop
11535 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11536 // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
11537 // subs varLoop, varPhi, #UnitSize
11538 // bne loopMBB
11539 // fallthrough --> exitMBB
11540 // exitMBB:
11541 // epilogue to handle left-over bytes
11542 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11543 // [destOut] = STRB_POST(scratch, destLoop, 1)
11544 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11545 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11546 MF->insert(It, loopMBB);
11547 MF->insert(It, exitMBB);
11548
11549 // Set the call frame size on entry to the new basic blocks.
11550 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
11551 loopMBB->setCallFrameSize(CallFrameSize);
11552 exitMBB->setCallFrameSize(CallFrameSize);
11553
11554 // Transfer the remainder of BB and its successor edges to exitMBB.
11555 exitMBB->splice(exitMBB->begin(), BB,
11556 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11558
11559 // Load an immediate to varEnd.
11560 Register varEnd = MRI.createVirtualRegister(TRC);
11561 if (Subtarget->useMovt()) {
11562 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi32imm : ARM::MOVi32imm),
11563 varEnd)
11564 .addImm(LoopSize);
11565 } else if (Subtarget->genExecuteOnly()) {
11566 assert(IsThumb && "Non-thumb expected to have used movt");
11567 BuildMI(BB, dl, TII->get(ARM::tMOVi32imm), varEnd).addImm(LoopSize);
11568 } else {
11571 const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
11572
11573 // MachineConstantPool wants an explicit alignment.
11574 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11575 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11576 MachineMemOperand *CPMMO =
11579
11580 if (IsThumb)
11581 BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
11582 .addReg(varEnd, RegState::Define)
11585 .addMemOperand(CPMMO);
11586 else
11587 BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
11588 .addReg(varEnd, RegState::Define)
11590 .addImm(0)
11592 .addMemOperand(CPMMO);
11593 }
11594 BB->addSuccessor(loopMBB);
11595
11596 // Generate the loop body:
11597 // varPhi = PHI(varLoop, varEnd)
11598 // srcPhi = PHI(srcLoop, src)
11599 // destPhi = PHI(destLoop, dst)
11600 MachineBasicBlock *entryBB = BB;
11601 BB = loopMBB;
11602 Register varLoop = MRI.createVirtualRegister(TRC);
11603 Register varPhi = MRI.createVirtualRegister(TRC);
11604 Register srcLoop = MRI.createVirtualRegister(TRC);
11605 Register srcPhi = MRI.createVirtualRegister(TRC);
11606 Register destLoop = MRI.createVirtualRegister(TRC);
11607 Register destPhi = MRI.createVirtualRegister(TRC);
11608
11609 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
11610 .addReg(varLoop).addMBB(loopMBB)
11611 .addReg(varEnd).addMBB(entryBB);
11612 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
11613 .addReg(srcLoop).addMBB(loopMBB)
11614 .addReg(src).addMBB(entryBB);
11615 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
11616 .addReg(destLoop).addMBB(loopMBB)
11617 .addReg(dest).addMBB(entryBB);
11618
11619 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11620 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
11621 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11622 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
11623 IsThumb1, IsThumb2);
11624 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
11625 IsThumb1, IsThumb2);
11626
11627 // Decrement loop variable by UnitSize.
11628 if (IsThumb1) {
11629 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
11630 .add(t1CondCodeOp())
11631 .addReg(varPhi)
11632 .addImm(UnitSize)
11634 } else {
11636 BuildMI(*BB, BB->end(), dl,
11637 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11638 MIB.addReg(varPhi)
11639 .addImm(UnitSize)
11641 .add(condCodeOp());
11642 MIB->getOperand(5).setReg(ARM::CPSR);
11643 MIB->getOperand(5).setIsDef(true);
11644 }
11645 BuildMI(*BB, BB->end(), dl,
11646 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11647 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
11648
11649 // loopMBB can loop back to loopMBB or fall through to exitMBB.
11650 BB->addSuccessor(loopMBB);
11651 BB->addSuccessor(exitMBB);
11652
11653 // Add epilogue to handle BytesLeft.
11654 BB = exitMBB;
11655 auto StartOfExit = exitMBB->begin();
11656
11657 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11658 // [destOut] = STRB_POST(scratch, destLoop, 1)
11659 unsigned srcIn = srcLoop;
11660 unsigned destIn = destLoop;
11661 for (unsigned i = 0; i < BytesLeft; i++) {
11662 Register srcOut = MRI.createVirtualRegister(TRC);
11663 Register destOut = MRI.createVirtualRegister(TRC);
11664 Register scratch = MRI.createVirtualRegister(TRC);
11665 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
11666 IsThumb1, IsThumb2);
11667 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
11668 IsThumb1, IsThumb2);
11669 srcIn = srcOut;
11670 destIn = destOut;
11671 }
11672
11673 MI.eraseFromParent(); // The instruction is gone now.
11674 return BB;
11675}
11676
11678ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
11679 MachineBasicBlock *MBB) const {
11681 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
11682 DebugLoc DL = MI.getDebugLoc();
11683
11684 assert(Subtarget->isTargetWindows() &&
11685 "__chkstk is only supported on Windows");
11686 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
11687
11688 // __chkstk takes the number of words to allocate on the stack in R4, and
11689 // returns the stack adjustment in number of bytes in R4. This will not
11690 // clober any other registers (other than the obvious lr).
11691 //
11692 // Although, technically, IP should be considered a register which may be
11693 // clobbered, the call itself will not touch it. Windows on ARM is a pure
11694 // thumb-2 environment, so there is no interworking required. As a result, we
11695 // do not expect a veneer to be emitted by the linker, clobbering IP.
11696 //
11697 // Each module receives its own copy of __chkstk, so no import thunk is
11698 // required, again, ensuring that IP is not clobbered.
11699 //
11700 // Finally, although some linkers may theoretically provide a trampoline for
11701 // out of range calls (which is quite common due to a 32M range limitation of
11702 // branches for Thumb), we can generate the long-call version via
11703 // -mcmodel=large, alleviating the need for the trampoline which may clobber
11704 // IP.
11705
11706 switch (TM.getCodeModel()) {
11707 case CodeModel::Tiny:
11708 llvm_unreachable("Tiny code model not available on ARM.");
11709 case CodeModel::Small:
11710 case CodeModel::Medium:
11711 case CodeModel::Kernel:
11712 BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
11714 .addExternalSymbol("__chkstk")
11717 .addReg(ARM::R12,
11719 .addReg(ARM::CPSR,
11721 break;
11722 case CodeModel::Large: {
11724 Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11725
11726 BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
11727 .addExternalSymbol("__chkstk");
11730 .addReg(Reg, RegState::Kill)
11733 .addReg(ARM::R12,
11735 .addReg(ARM::CPSR,
11737 break;
11738 }
11739 }
11740
11741 BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
11742 .addReg(ARM::SP, RegState::Kill)
11743 .addReg(ARM::R4, RegState::Kill)
11746 .add(condCodeOp());
11747
11748 MI.eraseFromParent();
11749 return MBB;
11750}
11751
11753ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
11754 MachineBasicBlock *MBB) const {
11755 DebugLoc DL = MI.getDebugLoc();
11756 MachineFunction *MF = MBB->getParent();
11757 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11758
11760 MF->insert(++MBB->getIterator(), ContBB);
11761 ContBB->splice(ContBB->begin(), MBB,
11762 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11764 MBB->addSuccessor(ContBB);
11765
11767 BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
11768 MF->push_back(TrapBB);
11769 MBB->addSuccessor(TrapBB);
11770
11771 BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
11772 .addReg(MI.getOperand(0).getReg())
11773 .addImm(0)
11775 BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
11776 .addMBB(TrapBB)
11778 .addReg(ARM::CPSR);
11779
11780 MI.eraseFromParent();
11781 return ContBB;
11782}
11783
11784// The CPSR operand of SelectItr might be missing a kill marker
11785// because there were multiple uses of CPSR, and ISel didn't know
11786// which to mark. Figure out whether SelectItr should have had a
11787// kill marker, and set it if it should. Returns the correct kill
11788// marker value.
11791 const TargetRegisterInfo* TRI) {
11792 // Scan forward through BB for a use/def of CPSR.
11793 MachineBasicBlock::iterator miI(std::next(SelectItr));
11794 for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
11795 const MachineInstr& mi = *miI;
11796 if (mi.readsRegister(ARM::CPSR, /*TRI=*/nullptr))
11797 return false;
11798 if (mi.definesRegister(ARM::CPSR, /*TRI=*/nullptr))
11799 break; // Should have kill-flag - update below.
11800 }
11801
11802 // If we hit the end of the block, check whether CPSR is live into a
11803 // successor.
11804 if (miI == BB->end()) {
11805 for (MachineBasicBlock *Succ : BB->successors())
11806 if (Succ->isLiveIn(ARM::CPSR))
11807 return false;
11808 }
11809
11810 // We found a def, or hit the end of the basic block and CPSR wasn't live
11811 // out. SelectMI should have a kill flag on CPSR.
11812 SelectItr->addRegisterKilled(ARM::CPSR, TRI);
11813 return true;
11814}
11815
11816/// Adds logic in loop entry MBB to calculate loop iteration count and adds
11817/// t2WhileLoopSetup and t2WhileLoopStart to generate WLS loop
11819 MachineBasicBlock *TpLoopBody,
11820 MachineBasicBlock *TpExit, Register OpSizeReg,
11821 const TargetInstrInfo *TII, DebugLoc Dl,
11823 // Calculates loop iteration count = ceil(n/16) = (n + 15) >> 4.
11824 Register AddDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11825 BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)
11826 .addUse(OpSizeReg)
11827 .addImm(15)
11829 .addReg(0);
11830
11831 Register LsrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11832 BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)
11833 .addUse(AddDestReg, RegState::Kill)
11834 .addImm(4)
11836 .addReg(0);
11837
11838 Register TotalIterationsReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11839 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11840 .addUse(LsrDestReg, RegState::Kill);
11841
11842 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopStart))
11843 .addUse(TotalIterationsReg)
11844 .addMBB(TpExit);
11845
11846 BuildMI(TpEntry, Dl, TII->get(ARM::t2B))
11847 .addMBB(TpLoopBody)
11849
11850 return TotalIterationsReg;
11851}
11852
11853/// Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and
11854/// t2DoLoopEnd. These are used by later passes to generate tail predicated
11855/// loops.
11856static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
11857 MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit,
11858 const TargetInstrInfo *TII, DebugLoc Dl,
11859 MachineRegisterInfo &MRI, Register OpSrcReg,
11860 Register OpDestReg, Register ElementCountReg,
11861 Register TotalIterationsReg, bool IsMemcpy) {
11862 // First insert 4 PHI nodes for: Current pointer to Src (if memcpy), Dest
11863 // array, loop iteration counter, predication counter.
11864
11865 Register SrcPhiReg, CurrSrcReg;
11866 if (IsMemcpy) {
11867 // Current position in the src array
11868 SrcPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11869 CurrSrcReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11870 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), SrcPhiReg)
11871 .addUse(OpSrcReg)
11872 .addMBB(TpEntry)
11873 .addUse(CurrSrcReg)
11874 .addMBB(TpLoopBody);
11875 }
11876
11877 // Current position in the dest array
11878 Register DestPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11879 Register CurrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11880 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), DestPhiReg)
11881 .addUse(OpDestReg)
11882 .addMBB(TpEntry)
11883 .addUse(CurrDestReg)
11884 .addMBB(TpLoopBody);
11885
11886 // Current loop counter
11887 Register LoopCounterPhiReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11888 Register RemainingLoopIterationsReg =
11889 MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11890 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), LoopCounterPhiReg)
11891 .addUse(TotalIterationsReg)
11892 .addMBB(TpEntry)
11893 .addUse(RemainingLoopIterationsReg)
11894 .addMBB(TpLoopBody);
11895
11896 // Predication counter
11897 Register PredCounterPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11898 Register RemainingElementsReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11899 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), PredCounterPhiReg)
11900 .addUse(ElementCountReg)
11901 .addMBB(TpEntry)
11902 .addUse(RemainingElementsReg)
11903 .addMBB(TpLoopBody);
11904
11905 // Pass predication counter to VCTP
11906 Register VccrReg = MRI.createVirtualRegister(&ARM::VCCRRegClass);
11907 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
11908 .addUse(PredCounterPhiReg)
11910 .addReg(0)
11911 .addReg(0);
11912
11913 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
11914 .addUse(PredCounterPhiReg)
11915 .addImm(16)
11917 .addReg(0);
11918
11919 // VLDRB (only if memcpy) and VSTRB instructions, predicated using VPR
11920 Register SrcValueReg;
11921 if (IsMemcpy) {
11922 SrcValueReg = MRI.createVirtualRegister(&ARM::MQPRRegClass);
11923 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VLDRBU8_post))
11924 .addDef(CurrSrcReg)
11925 .addDef(SrcValueReg)
11926 .addReg(SrcPhiReg)
11927 .addImm(16)
11929 .addUse(VccrReg)
11930 .addReg(0);
11931 } else
11932 SrcValueReg = OpSrcReg;
11933
11934 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VSTRBU8_post))
11935 .addDef(CurrDestReg)
11936 .addUse(SrcValueReg)
11937 .addReg(DestPhiReg)
11938 .addImm(16)
11940 .addUse(VccrReg)
11941 .addReg(0);
11942
11943 // Add the pseudoInstrs for decrementing the loop counter and marking the
11944 // end:t2DoLoopDec and t2DoLoopEnd
11945 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
11946 .addUse(LoopCounterPhiReg)
11947 .addImm(1);
11948
11949 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopEnd))
11950 .addUse(RemainingLoopIterationsReg)
11951 .addMBB(TpLoopBody);
11952
11953 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2B))
11954 .addMBB(TpExit)
11956}
11957
11960 MachineBasicBlock *BB) const {
11961 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11962 DebugLoc dl = MI.getDebugLoc();
11963 bool isThumb2 = Subtarget->isThumb2();
11964 switch (MI.getOpcode()) {
11965 default: {
11966 MI.print(errs());
11967 llvm_unreachable("Unexpected instr type to insert");
11968 }
11969
11970 // Thumb1 post-indexed loads are really just single-register LDMs.
11971 case ARM::tLDR_postidx: {
11972 MachineOperand Def(MI.getOperand(1));
11973 BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
11974 .add(Def) // Rn_wb
11975 .add(MI.getOperand(2)) // Rn
11976 .add(MI.getOperand(3)) // PredImm
11977 .add(MI.getOperand(4)) // PredReg
11978 .add(MI.getOperand(0)) // Rt
11979 .cloneMemRefs(MI);
11980 MI.eraseFromParent();
11981 return BB;
11982 }
11983
11984 case ARM::MVE_MEMCPYLOOPINST:
11985 case ARM::MVE_MEMSETLOOPINST: {
11986
11987 // Transformation below expands MVE_MEMCPYLOOPINST/MVE_MEMSETLOOPINST Pseudo
11988 // into a Tail Predicated (TP) Loop. It adds the instructions to calculate
11989 // the iteration count =ceil(size_in_bytes/16)) in the TP entry block and
11990 // adds the relevant instructions in the TP loop Body for generation of a
11991 // WLSTP loop.
11992
11993 // Below is relevant portion of the CFG after the transformation.
11994 // The Machine Basic Blocks are shown along with branch conditions (in
11995 // brackets). Note that TP entry/exit MBBs depict the entry/exit of this
11996 // portion of the CFG and may not necessarily be the entry/exit of the
11997 // function.
11998
11999 // (Relevant) CFG after transformation:
12000 // TP entry MBB
12001 // |
12002 // |-----------------|
12003 // (n <= 0) (n > 0)
12004 // | |
12005 // | TP loop Body MBB<--|
12006 // | | |
12007 // \ |___________|
12008 // \ /
12009 // TP exit MBB
12010
12011 MachineFunction *MF = BB->getParent();
12012 MachineFunctionProperties &Properties = MF->getProperties();
12014
12015 Register OpDestReg = MI.getOperand(0).getReg();
12016 Register OpSrcReg = MI.getOperand(1).getReg();
12017 Register OpSizeReg = MI.getOperand(2).getReg();
12018
12019 // Allocate the required MBBs and add to parent function.
12020 MachineBasicBlock *TpEntry = BB;
12021 MachineBasicBlock *TpLoopBody = MF->CreateMachineBasicBlock();
12022 MachineBasicBlock *TpExit;
12023
12024 MF->push_back(TpLoopBody);
12025
12026 // If any instructions are present in the current block after
12027 // MVE_MEMCPYLOOPINST or MVE_MEMSETLOOPINST, split the current block and
12028 // move the instructions into the newly created exit block. If there are no
12029 // instructions add an explicit branch to the FallThrough block and then
12030 // split.
12031 //
12032 // The split is required for two reasons:
12033 // 1) A terminator(t2WhileLoopStart) will be placed at that site.
12034 // 2) Since a TPLoopBody will be added later, any phis in successive blocks
12035 // need to be updated. splitAt() already handles this.
12036 TpExit = BB->splitAt(MI, false);
12037 if (TpExit == BB) {
12038 assert(BB->canFallThrough() && "Exit Block must be Fallthrough of the "
12039 "block containing memcpy/memset Pseudo");
12040 TpExit = BB->getFallThrough();
12041 BuildMI(BB, dl, TII->get(ARM::t2B))
12042 .addMBB(TpExit)
12044 TpExit = BB->splitAt(MI, false);
12045 }
12046
12047 // Add logic for iteration count
12048 Register TotalIterationsReg =
12049 genTPEntry(TpEntry, TpLoopBody, TpExit, OpSizeReg, TII, dl, MRI);
12050
12051 // Add the vectorized (and predicated) loads/store instructions
12052 bool IsMemcpy = MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;
12053 genTPLoopBody(TpLoopBody, TpEntry, TpExit, TII, dl, MRI, OpSrcReg,
12054 OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);
12055
12056 // Required to avoid conflict with the MachineVerifier during testing.
12058
12059 // Connect the blocks
12060 TpEntry->addSuccessor(TpLoopBody);
12061 TpLoopBody->addSuccessor(TpLoopBody);
12062 TpLoopBody->addSuccessor(TpExit);
12063
12064 // Reorder for a more natural layout
12065 TpLoopBody->moveAfter(TpEntry);
12066 TpExit->moveAfter(TpLoopBody);
12067
12068 // Finally, remove the memcpy Pseudo Instruction
12069 MI.eraseFromParent();
12070
12071 // Return the exit block as it may contain other instructions requiring a
12072 // custom inserter
12073 return TpExit;
12074 }
12075
12076 // The Thumb2 pre-indexed stores have the same MI operands, they just
12077 // define them differently in the .td files from the isel patterns, so
12078 // they need pseudos.
12079 case ARM::t2STR_preidx:
12080 MI.setDesc(TII->get(ARM::t2STR_PRE));
12081 return BB;
12082 case ARM::t2STRB_preidx:
12083 MI.setDesc(TII->get(ARM::t2STRB_PRE));
12084 return BB;
12085 case ARM::t2STRH_preidx:
12086 MI.setDesc(TII->get(ARM::t2STRH_PRE));
12087 return BB;
12088
12089 case ARM::STRi_preidx:
12090 case ARM::STRBi_preidx: {
12091 unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
12092 : ARM::STRB_PRE_IMM;
12093 // Decode the offset.
12094 unsigned Offset = MI.getOperand(4).getImm();
12095 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
12097 if (isSub)
12098 Offset = -Offset;
12099
12100 MachineMemOperand *MMO = *MI.memoperands_begin();
12101 BuildMI(*BB, MI, dl, TII->get(NewOpc))
12102 .add(MI.getOperand(0)) // Rn_wb
12103 .add(MI.getOperand(1)) // Rt
12104 .add(MI.getOperand(2)) // Rn
12105 .addImm(Offset) // offset (skip GPR==zero_reg)
12106 .add(MI.getOperand(5)) // pred
12107 .add(MI.getOperand(6))
12108 .addMemOperand(MMO);
12109 MI.eraseFromParent();
12110 return BB;
12111 }
12112 case ARM::STRr_preidx:
12113 case ARM::STRBr_preidx:
12114 case ARM::STRH_preidx: {
12115 unsigned NewOpc;
12116 switch (MI.getOpcode()) {
12117 default: llvm_unreachable("unexpected opcode!");
12118 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
12119 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
12120 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
12121 }
12122 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
12123 for (const MachineOperand &MO : MI.operands())
12124 MIB.add(MO);
12125 MI.eraseFromParent();
12126 return BB;
12127 }
12128
12129 case ARM::tMOVCCr_pseudo: {
12130 // To "insert" a SELECT_CC instruction, we actually have to insert the
12131 // diamond control-flow pattern. The incoming instruction knows the
12132 // destination vreg to set, the condition code register to branch on, the
12133 // true/false values to select between, and a branch opcode to use.
12134 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12136
12137 // thisMBB:
12138 // ...
12139 // TrueVal = ...
12140 // cmpTY ccX, r1, r2
12141 // bCC copy1MBB
12142 // fallthrough --> copy0MBB
12143 MachineBasicBlock *thisMBB = BB;
12144 MachineFunction *F = BB->getParent();
12145 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12146 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12147 F->insert(It, copy0MBB);
12148 F->insert(It, sinkMBB);
12149
12150 // Set the call frame size on entry to the new basic blocks.
12151 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12152 copy0MBB->setCallFrameSize(CallFrameSize);
12153 sinkMBB->setCallFrameSize(CallFrameSize);
12154
12155 // Check whether CPSR is live past the tMOVCCr_pseudo.
12156 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
12157 if (!MI.killsRegister(ARM::CPSR, /*TRI=*/nullptr) &&
12158 !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
12159 copy0MBB->addLiveIn(ARM::CPSR);
12160 sinkMBB->addLiveIn(ARM::CPSR);
12161 }
12162
12163 // Transfer the remainder of BB and its successor edges to sinkMBB.
12164 sinkMBB->splice(sinkMBB->begin(), BB,
12165 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12167
12168 BB->addSuccessor(copy0MBB);
12169 BB->addSuccessor(sinkMBB);
12170
12171 BuildMI(BB, dl, TII->get(ARM::tBcc))
12172 .addMBB(sinkMBB)
12173 .addImm(MI.getOperand(3).getImm())
12174 .addReg(MI.getOperand(4).getReg());
12175
12176 // copy0MBB:
12177 // %FalseValue = ...
12178 // # fallthrough to sinkMBB
12179 BB = copy0MBB;
12180
12181 // Update machine-CFG edges
12182 BB->addSuccessor(sinkMBB);
12183
12184 // sinkMBB:
12185 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12186 // ...
12187 BB = sinkMBB;
12188 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
12189 .addReg(MI.getOperand(1).getReg())
12190 .addMBB(copy0MBB)
12191 .addReg(MI.getOperand(2).getReg())
12192 .addMBB(thisMBB);
12193
12194 MI.eraseFromParent(); // The pseudo instruction is gone now.
12195 return BB;
12196 }
12197
12198 case ARM::BCCi64:
12199 case ARM::BCCZi64: {
12200 // If there is an unconditional branch to the other successor, remove it.
12201 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
12202
12203 // Compare both parts that make up the double comparison separately for
12204 // equality.
12205 bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
12206
12207 Register LHS1 = MI.getOperand(1).getReg();
12208 Register LHS2 = MI.getOperand(2).getReg();
12209 if (RHSisZero) {
12210 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12211 .addReg(LHS1)
12212 .addImm(0)
12214 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12215 .addReg(LHS2).addImm(0)
12216 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12217 } else {
12218 Register RHS1 = MI.getOperand(3).getReg();
12219 Register RHS2 = MI.getOperand(4).getReg();
12220 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12221 .addReg(LHS1)
12222 .addReg(RHS1)
12224 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12225 .addReg(LHS2).addReg(RHS2)
12226 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12227 }
12228
12229 MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
12230 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
12231 if (MI.getOperand(0).getImm() == ARMCC::NE)
12232 std::swap(destMBB, exitMBB);
12233
12234 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12235 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
12236 if (isThumb2)
12237 BuildMI(BB, dl, TII->get(ARM::t2B))
12238 .addMBB(exitMBB)
12240 else
12241 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
12242
12243 MI.eraseFromParent(); // The pseudo instruction is gone now.
12244 return BB;
12245 }
12246
12247 case ARM::Int_eh_sjlj_setjmp:
12248 case ARM::Int_eh_sjlj_setjmp_nofp:
12249 case ARM::tInt_eh_sjlj_setjmp:
12250 case ARM::t2Int_eh_sjlj_setjmp:
12251 case ARM::t2Int_eh_sjlj_setjmp_nofp:
12252 return BB;
12253
12254 case ARM::Int_eh_sjlj_setup_dispatch:
12255 EmitSjLjDispatchBlock(MI, BB);
12256 return BB;
12257
12258 case ARM::ABS:
12259 case ARM::t2ABS: {
12260 // To insert an ABS instruction, we have to insert the
12261 // diamond control-flow pattern. The incoming instruction knows the
12262 // source vreg to test against 0, the destination vreg to set,
12263 // the condition code register to branch on, the
12264 // true/false values to select between, and a branch opcode to use.
12265 // It transforms
12266 // V1 = ABS V0
12267 // into
12268 // V2 = MOVS V0
12269 // BCC (branch to SinkBB if V0 >= 0)
12270 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
12271 // SinkBB: V1 = PHI(V2, V3)
12272 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12274 MachineFunction *Fn = BB->getParent();
12275 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12276 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12277 Fn->insert(BBI, RSBBB);
12278 Fn->insert(BBI, SinkBB);
12279
12280 Register ABSSrcReg = MI.getOperand(1).getReg();
12281 Register ABSDstReg = MI.getOperand(0).getReg();
12282 bool ABSSrcKIll = MI.getOperand(1).isKill();
12283 bool isThumb2 = Subtarget->isThumb2();
12285 // In Thumb mode S must not be specified if source register is the SP or
12286 // PC and if destination register is the SP, so restrict register class
12287 Register NewRsbDstReg = MRI.createVirtualRegister(
12288 isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
12289
12290 // Transfer the remainder of BB and its successor edges to sinkMBB.
12291 SinkBB->splice(SinkBB->begin(), BB,
12292 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12294
12295 BB->addSuccessor(RSBBB);
12296 BB->addSuccessor(SinkBB);
12297
12298 // fall through to SinkMBB
12299 RSBBB->addSuccessor(SinkBB);
12300
12301 // insert a cmp at the end of BB
12302 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12303 .addReg(ABSSrcReg)
12304 .addImm(0)
12306
12307 // insert a bcc with opposite CC to ARMCC::MI at the end of BB
12308 BuildMI(BB, dl,
12309 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
12311
12312 // insert rsbri in RSBBB
12313 // Note: BCC and rsbri will be converted into predicated rsbmi
12314 // by if-conversion pass
12315 BuildMI(*RSBBB, RSBBB->begin(), dl,
12316 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12317 .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
12318 .addImm(0)
12320 .add(condCodeOp());
12321
12322 // insert PHI in SinkBB,
12323 // reuse ABSDstReg to not change uses of ABS instruction
12324 BuildMI(*SinkBB, SinkBB->begin(), dl,
12325 TII->get(ARM::PHI), ABSDstReg)
12326 .addReg(NewRsbDstReg).addMBB(RSBBB)
12327 .addReg(ABSSrcReg).addMBB(BB);
12328
12329 // remove ABS instruction
12330 MI.eraseFromParent();
12331
12332 // return last added BB
12333 return SinkBB;
12334 }
12335 case ARM::COPY_STRUCT_BYVAL_I32:
12336 ++NumLoopByVals;
12337 return EmitStructByval(MI, BB);
12338 case ARM::WIN__CHKSTK:
12339 return EmitLowered__chkstk(MI, BB);
12340 case ARM::WIN__DBZCHK:
12341 return EmitLowered__dbzchk(MI, BB);
12342 }
12343}
12344
12345/// Attaches vregs to MEMCPY that it will use as scratch registers
12346/// when it is expanded into LDM/STM. This is done as a post-isel lowering
12347/// instead of as a custom inserter because we need the use list from the SDNode.
12348static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
12349 MachineInstr &MI, const SDNode *Node) {
12350 bool isThumb1 = Subtarget->isThumb1Only();
12351
12352 DebugLoc DL = MI.getDebugLoc();
12353 MachineFunction *MF = MI.getParent()->getParent();
12355 MachineInstrBuilder MIB(*MF, MI);
12356
12357 // If the new dst/src is unused mark it as dead.
12358 if (!Node->hasAnyUseOfValue(0)) {
12359 MI.getOperand(0).setIsDead(true);
12360 }
12361 if (!Node->hasAnyUseOfValue(1)) {
12362 MI.getOperand(1).setIsDead(true);
12363 }
12364
12365 // The MEMCPY both defines and kills the scratch registers.
12366 for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
12367 Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
12368 : &ARM::GPRRegClass);
12370 }
12371}
12372
12374 SDNode *Node) const {
12375 if (MI.getOpcode() == ARM::MEMCPY) {
12376 attachMEMCPYScratchRegs(Subtarget, MI, Node);
12377 return;
12378 }
12379
12380 const MCInstrDesc *MCID = &MI.getDesc();
12381 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
12382 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
12383 // operand is still set to noreg. If needed, set the optional operand's
12384 // register to CPSR, and remove the redundant implicit def.
12385 //
12386 // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
12387
12388 // Rename pseudo opcodes.
12389 unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
12390 unsigned ccOutIdx;
12391 if (NewOpc) {
12392 const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
12393 MCID = &TII->get(NewOpc);
12394
12395 assert(MCID->getNumOperands() ==
12396 MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
12397 && "converted opcode should be the same except for cc_out"
12398 " (and, on Thumb1, pred)");
12399
12400 MI.setDesc(*MCID);
12401
12402 // Add the optional cc_out operand
12403 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
12404
12405 // On Thumb1, move all input operands to the end, then add the predicate
12406 if (Subtarget->isThumb1Only()) {
12407 for (unsigned c = MCID->getNumOperands() - 4; c--;) {
12408 MI.addOperand(MI.getOperand(1));
12409 MI.removeOperand(1);
12410 }
12411
12412 // Restore the ties
12413 for (unsigned i = MI.getNumOperands(); i--;) {
12414 const MachineOperand& op = MI.getOperand(i);
12415 if (op.isReg() && op.isUse()) {
12416 int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
12417 if (DefIdx != -1)
12418 MI.tieOperands(DefIdx, i);
12419 }
12420 }
12421
12423 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
12424 ccOutIdx = 1;
12425 } else
12426 ccOutIdx = MCID->getNumOperands() - 1;
12427 } else
12428 ccOutIdx = MCID->getNumOperands() - 1;
12429
12430 // Any ARM instruction that sets the 's' bit should specify an optional
12431 // "cc_out" operand in the last operand position.
12432 if (!MI.hasOptionalDef() || !MCID->operands()[ccOutIdx].isOptionalDef()) {
12433 assert(!NewOpc && "Optional cc_out operand required");
12434 return;
12435 }
12436 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
12437 // since we already have an optional CPSR def.
12438 bool definesCPSR = false;
12439 bool deadCPSR = false;
12440 for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
12441 ++i) {
12442 const MachineOperand &MO = MI.getOperand(i);
12443 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
12444 definesCPSR = true;
12445 if (MO.isDead())
12446 deadCPSR = true;
12447 MI.removeOperand(i);
12448 break;
12449 }
12450 }
12451 if (!definesCPSR) {
12452 assert(!NewOpc && "Optional cc_out operand required");
12453 return;
12454 }
12455 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
12456 if (deadCPSR) {
12457 assert(!MI.getOperand(ccOutIdx).getReg() &&
12458 "expect uninitialized optional cc_out operand");
12459 // Thumb1 instructions must have the S bit even if the CPSR is dead.
12460 if (!Subtarget->isThumb1Only())
12461 return;
12462 }
12463
12464 // If this instruction was defined with an optional CPSR def and its dag node
12465 // had a live implicit CPSR def, then activate the optional CPSR def.
12466 MachineOperand &MO = MI.getOperand(ccOutIdx);
12467 MO.setReg(ARM::CPSR);
12468 MO.setIsDef(true);
12469}
12470
12471//===----------------------------------------------------------------------===//
12472// ARM Optimization Hooks
12473//===----------------------------------------------------------------------===//
12474
12475// Helper function that checks if N is a null or all ones constant.
12476static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
12478}
12479
12480// Return true if N is conditionally 0 or all ones.
12481// Detects these expressions where cc is an i1 value:
12482//
12483// (select cc 0, y) [AllOnes=0]
12484// (select cc y, 0) [AllOnes=0]
12485// (zext cc) [AllOnes=0]
12486// (sext cc) [AllOnes=0/1]
12487// (select cc -1, y) [AllOnes=1]
12488// (select cc y, -1) [AllOnes=1]
12489//
12490// Invert is set when N is the null/all ones constant when CC is false.
12491// OtherOp is set to the alternative value of N.
12493 SDValue &CC, bool &Invert,
12494 SDValue &OtherOp,
12495 SelectionDAG &DAG) {
12496 switch (N->getOpcode()) {
12497 default: return false;
12498 case ISD::SELECT: {
12499 CC = N->getOperand(0);
12500 SDValue N1 = N->getOperand(1);
12501 SDValue N2 = N->getOperand(2);
12502 if (isZeroOrAllOnes(N1, AllOnes)) {
12503 Invert = false;
12504 OtherOp = N2;
12505 return true;
12506 }
12507 if (isZeroOrAllOnes(N2, AllOnes)) {
12508 Invert = true;
12509 OtherOp = N1;
12510 return true;
12511 }
12512 return false;
12513 }
12514 case ISD::ZERO_EXTEND:
12515 // (zext cc) can never be the all ones value.
12516 if (AllOnes)
12517 return false;
12518 [[fallthrough]];
12519 case ISD::SIGN_EXTEND: {
12520 SDLoc dl(N);
12521 EVT VT = N->getValueType(0);
12522 CC = N->getOperand(0);
12523 if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
12524 return false;
12525 Invert = !AllOnes;
12526 if (AllOnes)
12527 // When looking for an AllOnes constant, N is an sext, and the 'other'
12528 // value is 0.
12529 OtherOp = DAG.getConstant(0, dl, VT);
12530 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12531 // When looking for a 0 constant, N can be zext or sext.
12532 OtherOp = DAG.getConstant(1, dl, VT);
12533 else
12534 OtherOp = DAG.getAllOnesConstant(dl, VT);
12535 return true;
12536 }
12537 }
12538}
12539
12540// Combine a constant select operand into its use:
12541//
12542// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
12543// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
12544// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
12545// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
12546// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
12547//
12548// The transform is rejected if the select doesn't have a constant operand that
12549// is null, or all ones when AllOnes is set.
12550//
12551// Also recognize sext/zext from i1:
12552//
12553// (add (zext cc), x) -> (select cc (add x, 1), x)
12554// (add (sext cc), x) -> (select cc (add x, -1), x)
12555//
12556// These transformations eventually create predicated instructions.
12557//
12558// @param N The node to transform.
12559// @param Slct The N operand that is a select.
12560// @param OtherOp The other N operand (x above).
12561// @param DCI Context.
12562// @param AllOnes Require the select constant to be all ones instead of null.
12563// @returns The new node, or SDValue() on failure.
12564static
12567 bool AllOnes = false) {
12568 SelectionDAG &DAG = DCI.DAG;
12569 EVT VT = N->getValueType(0);
12570 SDValue NonConstantVal;
12571 SDValue CCOp;
12572 bool SwapSelectOps;
12573 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
12574 NonConstantVal, DAG))
12575 return SDValue();
12576
12577 // Slct is now know to be the desired identity constant when CC is true.
12578 SDValue TrueVal = OtherOp;
12579 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
12580 OtherOp, NonConstantVal);
12581 // Unless SwapSelectOps says CC should be false.
12582 if (SwapSelectOps)
12583 std::swap(TrueVal, FalseVal);
12584
12585 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12586 CCOp, TrueVal, FalseVal);
12587}
12588
12589// Attempt combineSelectAndUse on each operand of a commutative operator N.
12590static
12593 SDValue N0 = N->getOperand(0);
12594 SDValue N1 = N->getOperand(1);
12595 if (N0.getNode()->hasOneUse())
12596 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
12597 return Result;
12598 if (N1.getNode()->hasOneUse())
12599 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
12600 return Result;
12601 return SDValue();
12602}
12603
12605 // VUZP shuffle node.
12606 if (N->getOpcode() == ARMISD::VUZP)
12607 return true;
12608
12609 // "VUZP" on i32 is an alias for VTRN.
12610 if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
12611 return true;
12612
12613 return false;
12614}
12615
12618 const ARMSubtarget *Subtarget) {
12619 // Look for ADD(VUZP.0, VUZP.1).
12620 if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
12621 N0 == N1)
12622 return SDValue();
12623
12624 // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
12625 if (!N->getValueType(0).is64BitVector())
12626 return SDValue();
12627
12628 // Generate vpadd.
12629 SelectionDAG &DAG = DCI.DAG;
12630 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12631 SDLoc dl(N);
12632 SDNode *Unzip = N0.getNode();
12633 EVT VT = N->getValueType(0);
12634
12636 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
12637 TLI.getPointerTy(DAG.getDataLayout())));
12638 Ops.push_back(Unzip->getOperand(0));
12639 Ops.push_back(Unzip->getOperand(1));
12640
12641 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12642}
12643
12646 const ARMSubtarget *Subtarget) {
12647 // Check for two extended operands.
12648 if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
12649 N1.getOpcode() == ISD::SIGN_EXTEND) &&
12650 !(N0.getOpcode() == ISD::ZERO_EXTEND &&
12651 N1.getOpcode() == ISD::ZERO_EXTEND))
12652 return SDValue();
12653
12654 SDValue N00 = N0.getOperand(0);
12655 SDValue N10 = N1.getOperand(0);
12656
12657 // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
12658 if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
12659 N00 == N10)
12660 return SDValue();
12661
12662 // We only recognize Q register paddl here; this can't be reached until
12663 // after type legalization.
12664 if (!N00.getValueType().is64BitVector() ||
12666 return SDValue();
12667
12668 // Generate vpaddl.
12669 SelectionDAG &DAG = DCI.DAG;
12670 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12671 SDLoc dl(N);
12672 EVT VT = N->getValueType(0);
12673
12675 // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
12676 unsigned Opcode;
12677 if (N0.getOpcode() == ISD::SIGN_EXTEND)
12678 Opcode = Intrinsic::arm_neon_vpaddls;
12679 else
12680 Opcode = Intrinsic::arm_neon_vpaddlu;
12681 Ops.push_back(DAG.getConstant(Opcode, dl,
12682 TLI.getPointerTy(DAG.getDataLayout())));
12683 EVT ElemTy = N00.getValueType().getVectorElementType();
12684 unsigned NumElts = VT.getVectorNumElements();
12685 EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
12686 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
12687 N00.getOperand(0), N00.getOperand(1));
12688 Ops.push_back(Concat);
12689
12690 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12691}
12692
12693// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
12694// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
12695// much easier to match.
12696static SDValue
12699 const ARMSubtarget *Subtarget) {
12700 // Only perform optimization if after legalize, and if NEON is available. We
12701 // also expected both operands to be BUILD_VECTORs.
12702 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
12703 || N0.getOpcode() != ISD::BUILD_VECTOR
12704 || N1.getOpcode() != ISD::BUILD_VECTOR)
12705 return SDValue();
12706
12707 // Check output type since VPADDL operand elements can only be 8, 16, or 32.
12708 EVT VT = N->getValueType(0);
12709 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
12710 return SDValue();
12711
12712 // Check that the vector operands are of the right form.
12713 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
12714 // operands, where N is the size of the formed vector.
12715 // Each EXTRACT_VECTOR should have the same input vector and odd or even
12716 // index such that we have a pair wise add pattern.
12717
12718 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
12720 return SDValue();
12721 SDValue Vec = N0->getOperand(0)->getOperand(0);
12722 SDNode *V = Vec.getNode();
12723 unsigned nextIndex = 0;
12724
12725 // For each operands to the ADD which are BUILD_VECTORs,
12726 // check to see if each of their operands are an EXTRACT_VECTOR with
12727 // the same vector and appropriate index.
12728 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
12731
12732 SDValue ExtVec0 = N0->getOperand(i);
12733 SDValue ExtVec1 = N1->getOperand(i);
12734
12735 // First operand is the vector, verify its the same.
12736 if (V != ExtVec0->getOperand(0).getNode() ||
12737 V != ExtVec1->getOperand(0).getNode())
12738 return SDValue();
12739
12740 // Second is the constant, verify its correct.
12741 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
12742 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
12743
12744 // For the constant, we want to see all the even or all the odd.
12745 if (!C0 || !C1 || C0->getZExtValue() != nextIndex
12746 || C1->getZExtValue() != nextIndex+1)
12747 return SDValue();
12748
12749 // Increment index.
12750 nextIndex+=2;
12751 } else
12752 return SDValue();
12753 }
12754
12755 // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
12756 // we're using the entire input vector, otherwise there's a size/legality
12757 // mismatch somewhere.
12758 if (nextIndex != Vec.getValueType().getVectorNumElements() ||
12760 return SDValue();
12761
12762 // Create VPADDL node.
12763 SelectionDAG &DAG = DCI.DAG;
12764 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12765
12766 SDLoc dl(N);
12767
12768 // Build operand list.
12770 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
12771 TLI.getPointerTy(DAG.getDataLayout())));
12772
12773 // Input is the vector.
12774 Ops.push_back(Vec);
12775
12776 // Get widened type and narrowed type.
12777 MVT widenType;
12778 unsigned numElem = VT.getVectorNumElements();
12779
12780 EVT inputLaneType = Vec.getValueType().getVectorElementType();
12781 switch (inputLaneType.getSimpleVT().SimpleTy) {
12782 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
12783 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
12784 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
12785 default:
12786 llvm_unreachable("Invalid vector element type for padd optimization.");
12787 }
12788
12789 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
12790 unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
12791 return DAG.getNode(ExtOp, dl, VT, tmp);
12792}
12793
12795 if (V->getOpcode() == ISD::UMUL_LOHI ||
12796 V->getOpcode() == ISD::SMUL_LOHI)
12797 return V;
12798 return SDValue();
12799}
12800
12801static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
12803 const ARMSubtarget *Subtarget) {
12804 if (!Subtarget->hasBaseDSP())
12805 return SDValue();
12806
12807 // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
12808 // accumulates the product into a 64-bit value. The 16-bit values will
12809 // be sign extended somehow or SRA'd into 32-bit values
12810 // (addc (adde (mul 16bit, 16bit), lo), hi)
12811 SDValue Mul = AddcNode->getOperand(0);
12812 SDValue Lo = AddcNode->getOperand(1);
12813 if (Mul.getOpcode() != ISD::MUL) {
12814 Lo = AddcNode->getOperand(0);
12815 Mul = AddcNode->getOperand(1);
12816 if (Mul.getOpcode() != ISD::MUL)
12817 return SDValue();
12818 }
12819
12820 SDValue SRA = AddeNode->getOperand(0);
12821 SDValue Hi = AddeNode->getOperand(1);
12822 if (SRA.getOpcode() != ISD::SRA) {
12823 SRA = AddeNode->getOperand(1);
12824 Hi = AddeNode->getOperand(0);
12825 if (SRA.getOpcode() != ISD::SRA)
12826 return SDValue();
12827 }
12828 if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
12829 if (Const->getZExtValue() != 31)
12830 return SDValue();
12831 } else
12832 return SDValue();
12833
12834 if (SRA.getOperand(0) != Mul)
12835 return SDValue();
12836
12837 SelectionDAG &DAG = DCI.DAG;
12838 SDLoc dl(AddcNode);
12839 unsigned Opcode = 0;
12840 SDValue Op0;
12841 SDValue Op1;
12842
12843 if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
12844 Opcode = ARMISD::SMLALBB;
12845 Op0 = Mul.getOperand(0);
12846 Op1 = Mul.getOperand(1);
12847 } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
12848 Opcode = ARMISD::SMLALBT;
12849 Op0 = Mul.getOperand(0);
12850 Op1 = Mul.getOperand(1).getOperand(0);
12851 } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
12852 Opcode = ARMISD::SMLALTB;
12853 Op0 = Mul.getOperand(0).getOperand(0);
12854 Op1 = Mul.getOperand(1);
12855 } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
12856 Opcode = ARMISD::SMLALTT;
12857 Op0 = Mul->getOperand(0).getOperand(0);
12858 Op1 = Mul->getOperand(1).getOperand(0);
12859 }
12860
12861 if (!Op0 || !Op1)
12862 return SDValue();
12863
12864 SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
12865 Op0, Op1, Lo, Hi);
12866 // Replace the ADDs' nodes uses by the MLA node's values.
12867 SDValue HiMLALResult(SMLAL.getNode(), 1);
12868 SDValue LoMLALResult(SMLAL.getNode(), 0);
12869
12870 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
12871 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
12872
12873 // Return original node to notify the driver to stop replacing.
12874 SDValue resNode(AddcNode, 0);
12875 return resNode;
12876}
12877
12880 const ARMSubtarget *Subtarget) {
12881 // Look for multiply add opportunities.
12882 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
12883 // each add nodes consumes a value from ISD::UMUL_LOHI and there is
12884 // a glue link from the first add to the second add.
12885 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
12886 // a S/UMLAL instruction.
12887 // UMUL_LOHI
12888 // / :lo \ :hi
12889 // V \ [no multiline comment]
12890 // loAdd -> ADDC |
12891 // \ :carry /
12892 // V V
12893 // ADDE <- hiAdd
12894 //
12895 // In the special case where only the higher part of a signed result is used
12896 // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
12897 // a constant with the exact value of 0x80000000, we recognize we are dealing
12898 // with a "rounded multiply and add" (or subtract) and transform it into
12899 // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
12900
12901 assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
12902 AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
12903 "Expect an ADDE or SUBE");
12904
12905 assert(AddeSubeNode->getNumOperands() == 3 &&
12906 AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
12907 "ADDE node has the wrong inputs");
12908
12909 // Check that we are chained to the right ADDC or SUBC node.
12910 SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
12911 if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12912 AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
12913 (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
12914 AddcSubcNode->getOpcode() != ARMISD::SUBC))
12915 return SDValue();
12916
12917 SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
12918 SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
12919
12920 // Check if the two operands are from the same mul_lohi node.
12921 if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
12922 return SDValue();
12923
12924 assert(AddcSubcNode->getNumValues() == 2 &&
12925 AddcSubcNode->getValueType(0) == MVT::i32 &&
12926 "Expect ADDC with two result values. First: i32");
12927
12928 // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
12929 // maybe a SMLAL which multiplies two 16-bit values.
12930 if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12931 AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
12932 AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
12933 AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
12934 AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
12935 return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
12936
12937 // Check for the triangle shape.
12938 SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
12939 SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
12940
12941 // Make sure that the ADDE/SUBE operands are not coming from the same node.
12942 if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
12943 return SDValue();
12944
12945 // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
12946 bool IsLeftOperandMUL = false;
12947 SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
12948 if (MULOp == SDValue())
12949 MULOp = findMUL_LOHI(AddeSubeOp1);
12950 else
12951 IsLeftOperandMUL = true;
12952 if (MULOp == SDValue())
12953 return SDValue();
12954
12955 // Figure out the right opcode.
12956 unsigned Opc = MULOp->getOpcode();
12957 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
12958
12959 // Figure out the high and low input values to the MLAL node.
12960 SDValue *HiAddSub = nullptr;
12961 SDValue *LoMul = nullptr;
12962 SDValue *LowAddSub = nullptr;
12963
12964 // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
12965 if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
12966 return SDValue();
12967
12968 if (IsLeftOperandMUL)
12969 HiAddSub = &AddeSubeOp1;
12970 else
12971 HiAddSub = &AddeSubeOp0;
12972
12973 // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
12974 // whose low result is fed to the ADDC/SUBC we are checking.
12975
12976 if (AddcSubcOp0 == MULOp.getValue(0)) {
12977 LoMul = &AddcSubcOp0;
12978 LowAddSub = &AddcSubcOp1;
12979 }
12980 if (AddcSubcOp1 == MULOp.getValue(0)) {
12981 LoMul = &AddcSubcOp1;
12982 LowAddSub = &AddcSubcOp0;
12983 }
12984
12985 if (!LoMul)
12986 return SDValue();
12987
12988 // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
12989 // the replacement below will create a cycle.
12990 if (AddcSubcNode == HiAddSub->getNode() ||
12991 AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
12992 return SDValue();
12993
12994 // Create the merged node.
12995 SelectionDAG &DAG = DCI.DAG;
12996
12997 // Start building operand list.
12999 Ops.push_back(LoMul->getOperand(0));
13000 Ops.push_back(LoMul->getOperand(1));
13001
13002 // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
13003 // the case, we must be doing signed multiplication and only use the higher
13004 // part of the result of the MLAL, furthermore the LowAddSub must be a constant
13005 // addition or subtraction with the value of 0x800000.
13006 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
13007 FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
13008 LowAddSub->getNode()->getOpcode() == ISD::Constant &&
13009 static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
13010 0x80000000) {
13011 Ops.push_back(*HiAddSub);
13012 if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
13013 FinalOpc = ARMISD::SMMLSR;
13014 } else {
13015 FinalOpc = ARMISD::SMMLAR;
13016 }
13017 SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
13018 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
13019
13020 return SDValue(AddeSubeNode, 0);
13021 } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
13022 // SMMLS is generated during instruction selection and the rest of this
13023 // function can not handle the case where AddcSubcNode is a SUBC.
13024 return SDValue();
13025
13026 // Finish building the operand list for {U/S}MLAL
13027 Ops.push_back(*LowAddSub);
13028 Ops.push_back(*HiAddSub);
13029
13030 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
13031 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13032
13033 // Replace the ADDs' nodes uses by the MLA node's values.
13034 SDValue HiMLALResult(MLALNode.getNode(), 1);
13035 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
13036
13037 SDValue LoMLALResult(MLALNode.getNode(), 0);
13038 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
13039
13040 // Return original node to notify the driver to stop replacing.
13041 return SDValue(AddeSubeNode, 0);
13042}
13043
13046 const ARMSubtarget *Subtarget) {
13047 // UMAAL is similar to UMLAL except that it adds two unsigned values.
13048 // While trying to combine for the other MLAL nodes, first search for the
13049 // chance to use UMAAL. Check if Addc uses a node which has already
13050 // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
13051 // as the addend, and it's handled in PerformUMLALCombine.
13052
13053 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13054 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13055
13056 // Check that we have a glued ADDC node.
13057 SDNode* AddcNode = AddeNode->getOperand(2).getNode();
13058 if (AddcNode->getOpcode() != ARMISD::ADDC)
13059 return SDValue();
13060
13061 // Find the converted UMAAL or quit if it doesn't exist.
13062 SDNode *UmlalNode = nullptr;
13063 SDValue AddHi;
13064 if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
13065 UmlalNode = AddcNode->getOperand(0).getNode();
13066 AddHi = AddcNode->getOperand(1);
13067 } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
13068 UmlalNode = AddcNode->getOperand(1).getNode();
13069 AddHi = AddcNode->getOperand(0);
13070 } else {
13071 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13072 }
13073
13074 // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
13075 // the ADDC as well as Zero.
13076 if (!isNullConstant(UmlalNode->getOperand(3)))
13077 return SDValue();
13078
13079 if ((isNullConstant(AddeNode->getOperand(0)) &&
13080 AddeNode->getOperand(1).getNode() == UmlalNode) ||
13081 (AddeNode->getOperand(0).getNode() == UmlalNode &&
13082 isNullConstant(AddeNode->getOperand(1)))) {
13083 SelectionDAG &DAG = DCI.DAG;
13084 SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
13085 UmlalNode->getOperand(2), AddHi };
13086 SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
13087 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13088
13089 // Replace the ADDs' nodes uses by the UMAAL node's values.
13090 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
13091 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
13092
13093 // Return original node to notify the driver to stop replacing.
13094 return SDValue(AddeNode, 0);
13095 }
13096 return SDValue();
13097}
13098
13100 const ARMSubtarget *Subtarget) {
13101 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13102 return SDValue();
13103
13104 // Check that we have a pair of ADDC and ADDE as operands.
13105 // Both addends of the ADDE must be zero.
13106 SDNode* AddcNode = N->getOperand(2).getNode();
13107 SDNode* AddeNode = N->getOperand(3).getNode();
13108 if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
13109 (AddeNode->getOpcode() == ARMISD::ADDE) &&
13110 isNullConstant(AddeNode->getOperand(0)) &&
13111 isNullConstant(AddeNode->getOperand(1)) &&
13112 (AddeNode->getOperand(2).getNode() == AddcNode))
13113 return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
13114 DAG.getVTList(MVT::i32, MVT::i32),
13115 {N->getOperand(0), N->getOperand(1),
13116 AddcNode->getOperand(0), AddcNode->getOperand(1)});
13117 else
13118 return SDValue();
13119}
13120
13123 const ARMSubtarget *Subtarget) {
13124 SelectionDAG &DAG(DCI.DAG);
13125
13126 if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
13127 // (SUBC (ADDE 0, 0, C), 1) -> C
13128 SDValue LHS = N->getOperand(0);
13129 SDValue RHS = N->getOperand(1);
13130 if (LHS->getOpcode() == ARMISD::ADDE &&
13131 isNullConstant(LHS->getOperand(0)) &&
13132 isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
13133 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
13134 }
13135 }
13136
13137 if (Subtarget->isThumb1Only()) {
13138 SDValue RHS = N->getOperand(1);
13139 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13140 int32_t imm = C->getSExtValue();
13141 if (imm < 0 && imm > std::numeric_limits<int>::min()) {
13142 SDLoc DL(N);
13143 RHS = DAG.getConstant(-imm, DL, MVT::i32);
13144 unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
13145 : ARMISD::ADDC;
13146 return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
13147 }
13148 }
13149 }
13150
13151 return SDValue();
13152}
13153
13156 const ARMSubtarget *Subtarget) {
13157 if (Subtarget->isThumb1Only()) {
13158 SelectionDAG &DAG = DCI.DAG;
13159 SDValue RHS = N->getOperand(1);
13160 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13161 int64_t imm = C->getSExtValue();
13162 if (imm < 0) {
13163 SDLoc DL(N);
13164
13165 // The with-carry-in form matches bitwise not instead of the negation.
13166 // Effectively, the inverse interpretation of the carry flag already
13167 // accounts for part of the negation.
13168 RHS = DAG.getConstant(~imm, DL, MVT::i32);
13169
13170 unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
13171 : ARMISD::ADDE;
13172 return DAG.getNode(Opcode, DL, N->getVTList(),
13173 N->getOperand(0), RHS, N->getOperand(2));
13174 }
13175 }
13176 } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
13177 return AddCombineTo64bitMLAL(N, DCI, Subtarget);
13178 }
13179 return SDValue();
13180}
13181
13184 const ARMSubtarget *Subtarget) {
13185 if (!Subtarget->hasMVEIntegerOps())
13186 return SDValue();
13187
13188 SDLoc dl(N);
13189 SDValue SetCC;
13190 SDValue LHS;
13191 SDValue RHS;
13193 SDValue TrueVal;
13194 SDValue FalseVal;
13195
13196 if (N->getOpcode() == ISD::SELECT &&
13197 N->getOperand(0)->getOpcode() == ISD::SETCC) {
13198 SetCC = N->getOperand(0);
13199 LHS = SetCC->getOperand(0);
13200 RHS = SetCC->getOperand(1);
13201 CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
13202 TrueVal = N->getOperand(1);
13203 FalseVal = N->getOperand(2);
13204 } else if (N->getOpcode() == ISD::SELECT_CC) {
13205 LHS = N->getOperand(0);
13206 RHS = N->getOperand(1);
13207 CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
13208 TrueVal = N->getOperand(2);
13209 FalseVal = N->getOperand(3);
13210 } else {
13211 return SDValue();
13212 }
13213
13214 unsigned int Opcode = 0;
13215 if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
13216 FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
13217 (CC == ISD::SETULT || CC == ISD::SETUGT)) {
13218 Opcode = ARMISD::VMINVu;
13219 if (CC == ISD::SETUGT)
13220 std::swap(TrueVal, FalseVal);
13221 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
13222 FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
13223 (CC == ISD::SETLT || CC == ISD::SETGT)) {
13224 Opcode = ARMISD::VMINVs;
13225 if (CC == ISD::SETGT)
13226 std::swap(TrueVal, FalseVal);
13227 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
13228 FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
13229 (CC == ISD::SETUGT || CC == ISD::SETULT)) {
13230 Opcode = ARMISD::VMAXVu;
13231 if (CC == ISD::SETULT)
13232 std::swap(TrueVal, FalseVal);
13233 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
13234 FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
13235 (CC == ISD::SETGT || CC == ISD::SETLT)) {
13236 Opcode = ARMISD::VMAXVs;
13237 if (CC == ISD::SETLT)
13238 std::swap(TrueVal, FalseVal);
13239 } else
13240 return SDValue();
13241
13242 // Normalise to the right hand side being the vector reduction
13243 switch (TrueVal->getOpcode()) {
13248 std::swap(LHS, RHS);
13249 std::swap(TrueVal, FalseVal);
13250 break;
13251 }
13252
13253 EVT VectorType = FalseVal->getOperand(0).getValueType();
13254
13255 if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
13256 VectorType != MVT::v4i32)
13257 return SDValue();
13258
13259 EVT VectorScalarType = VectorType.getVectorElementType();
13260
13261 // The values being selected must also be the ones being compared
13262 if (TrueVal != LHS || FalseVal != RHS)
13263 return SDValue();
13264
13265 EVT LeftType = LHS->getValueType(0);
13266 EVT RightType = RHS->getValueType(0);
13267
13268 // The types must match the reduced type too
13269 if (LeftType != VectorScalarType || RightType != VectorScalarType)
13270 return SDValue();
13271
13272 // Legalise the scalar to an i32
13273 if (VectorScalarType != MVT::i32)
13274 LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
13275
13276 // Generate the reduction as an i32 for legalisation purposes
13277 auto Reduction =
13278 DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
13279
13280 // The result isn't actually an i32 so truncate it back to its original type
13281 if (VectorScalarType != MVT::i32)
13282 Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
13283
13284 return Reduction;
13285}
13286
13287// A special combine for the vqdmulh family of instructions. This is one of the
13288// potential set of patterns that could patch this instruction. The base pattern
13289// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
13290// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
13291// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
13292// the max is unnecessary.
13294 EVT VT = N->getValueType(0);
13295 SDValue Shft;
13296 ConstantSDNode *Clamp;
13297
13298 if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
13299 return SDValue();
13300
13301 if (N->getOpcode() == ISD::SMIN) {
13302 Shft = N->getOperand(0);
13303 Clamp = isConstOrConstSplat(N->getOperand(1));
13304 } else if (N->getOpcode() == ISD::VSELECT) {
13305 // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
13306 SDValue Cmp = N->getOperand(0);
13307 if (Cmp.getOpcode() != ISD::SETCC ||
13308 cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
13309 Cmp.getOperand(0) != N->getOperand(1) ||
13310 Cmp.getOperand(1) != N->getOperand(2))
13311 return SDValue();
13312 Shft = N->getOperand(1);
13313 Clamp = isConstOrConstSplat(N->getOperand(2));
13314 } else
13315 return SDValue();
13316
13317 if (!Clamp)
13318 return SDValue();
13319
13320 MVT ScalarType;
13321 int ShftAmt = 0;
13322 switch (Clamp->getSExtValue()) {
13323 case (1 << 7) - 1:
13324 ScalarType = MVT::i8;
13325 ShftAmt = 7;
13326 break;
13327 case (1 << 15) - 1:
13328 ScalarType = MVT::i16;
13329 ShftAmt = 15;
13330 break;
13331 case (1ULL << 31) - 1:
13332 ScalarType = MVT::i32;
13333 ShftAmt = 31;
13334 break;
13335 default:
13336 return SDValue();
13337 }
13338
13339 if (Shft.getOpcode() != ISD::SRA)
13340 return SDValue();
13342 if (!N1 || N1->getSExtValue() != ShftAmt)
13343 return SDValue();
13344
13345 SDValue Mul = Shft.getOperand(0);
13346 if (Mul.getOpcode() != ISD::MUL)
13347 return SDValue();
13348
13349 SDValue Ext0 = Mul.getOperand(0);
13350 SDValue Ext1 = Mul.getOperand(1);
13351 if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
13352 Ext1.getOpcode() != ISD::SIGN_EXTEND)
13353 return SDValue();
13354 EVT VecVT = Ext0.getOperand(0).getValueType();
13355 if (!VecVT.isPow2VectorType() || VecVT.getVectorNumElements() == 1)
13356 return SDValue();
13357 if (Ext1.getOperand(0).getValueType() != VecVT ||
13358 VecVT.getScalarType() != ScalarType ||
13359 VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
13360 return SDValue();
13361
13362 SDLoc DL(Mul);
13363 unsigned LegalLanes = 128 / (ShftAmt + 1);
13364 EVT LegalVecVT = MVT::getVectorVT(ScalarType, LegalLanes);
13365 // For types smaller than legal vectors extend to be legal and only use needed
13366 // lanes.
13367 if (VecVT.getSizeInBits() < 128) {
13368 EVT ExtVecVT =
13370 VecVT.getVectorNumElements());
13371 SDValue Inp0 =
13372 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext0.getOperand(0));
13373 SDValue Inp1 =
13374 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext1.getOperand(0));
13375 Inp0 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp0);
13376 Inp1 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp1);
13377 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13378 SDValue Trunc = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, ExtVecVT, VQDMULH);
13379 Trunc = DAG.getNode(ISD::TRUNCATE, DL, VecVT, Trunc);
13380 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Trunc);
13381 }
13382
13383 // For larger types, split into legal sized chunks.
13384 assert(VecVT.getSizeInBits() % 128 == 0 && "Expected a power2 type");
13385 unsigned NumParts = VecVT.getSizeInBits() / 128;
13387 for (unsigned I = 0; I < NumParts; ++I) {
13388 SDValue Inp0 =
13389 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext0.getOperand(0),
13390 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13391 SDValue Inp1 =
13392 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext1.getOperand(0),
13393 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13394 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13395 Parts.push_back(VQDMULH);
13396 }
13397 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT,
13398 DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts));
13399}
13400
13403 const ARMSubtarget *Subtarget) {
13404 if (!Subtarget->hasMVEIntegerOps())
13405 return SDValue();
13406
13407 if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
13408 return V;
13409
13410 // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
13411 //
13412 // We need to re-implement this optimization here as the implementation in the
13413 // Target-Independent DAGCombiner does not handle the kind of constant we make
13414 // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
13415 // good reason, allowing truncation there would break other targets).
13416 //
13417 // Currently, this is only done for MVE, as it's the only target that benefits
13418 // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
13419 if (N->getOperand(0).getOpcode() != ISD::XOR)
13420 return SDValue();
13421 SDValue XOR = N->getOperand(0);
13422
13423 // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
13424 // It is important to check with truncation allowed as the BUILD_VECTORs we
13425 // generate in those situations will truncate their operands.
13426 ConstantSDNode *Const =
13427 isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
13428 /*AllowTruncation*/ true);
13429 if (!Const || !Const->isOne())
13430 return SDValue();
13431
13432 // Rewrite into vselect(cond, rhs, lhs).
13433 SDValue Cond = XOR->getOperand(0);
13434 SDValue LHS = N->getOperand(1);
13435 SDValue RHS = N->getOperand(2);
13436 EVT Type = N->getValueType(0);
13437 return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
13438}
13439
13440// Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
13443 const ARMSubtarget *Subtarget) {
13444 SDValue Op0 = N->getOperand(0);
13445 SDValue Op1 = N->getOperand(1);
13446 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13447 EVT VT = N->getValueType(0);
13448
13449 if (!Subtarget->hasMVEIntegerOps() ||
13451 return SDValue();
13452
13453 if (CC == ISD::SETUGE) {
13454 std::swap(Op0, Op1);
13455 CC = ISD::SETULT;
13456 }
13457
13458 if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 ||
13460 return SDValue();
13461
13462 // Check first operand is BuildVector of 0,1,2,...
13463 for (unsigned I = 0; I < VT.getVectorNumElements(); I++) {
13464 if (!Op0.getOperand(I).isUndef() &&
13465 !(isa<ConstantSDNode>(Op0.getOperand(I)) &&
13466 Op0.getConstantOperandVal(I) == I))
13467 return SDValue();
13468 }
13469
13470 // The second is a Splat of Op1S
13471 SDValue Op1S = DCI.DAG.getSplatValue(Op1);
13472 if (!Op1S)
13473 return SDValue();
13474
13475 unsigned Opc;
13476 switch (VT.getVectorNumElements()) {
13477 case 2:
13478 Opc = Intrinsic::arm_mve_vctp64;
13479 break;
13480 case 4:
13481 Opc = Intrinsic::arm_mve_vctp32;
13482 break;
13483 case 8:
13484 Opc = Intrinsic::arm_mve_vctp16;
13485 break;
13486 case 16:
13487 Opc = Intrinsic::arm_mve_vctp8;
13488 break;
13489 default:
13490 return SDValue();
13491 }
13492
13493 SDLoc DL(N);
13494 return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13495 DCI.DAG.getConstant(Opc, DL, MVT::i32),
13496 DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
13497}
13498
13501 const ARMSubtarget *Subtarget) {
13502 SelectionDAG &DAG = DCI.DAG;
13503 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13504
13505 if (TLI.isOperationLegal(N->getOpcode(), N->getValueType(0)))
13506 return SDValue();
13507
13508 return TLI.expandABS(N, DAG);
13509}
13510
13511/// PerformADDECombine - Target-specific dag combine transform from
13512/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
13513/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
13516 const ARMSubtarget *Subtarget) {
13517 // Only ARM and Thumb2 support UMLAL/SMLAL.
13518 if (Subtarget->isThumb1Only())
13519 return PerformAddeSubeCombine(N, DCI, Subtarget);
13520
13521 // Only perform the checks after legalize when the pattern is available.
13522 if (DCI.isBeforeLegalize()) return SDValue();
13523
13524 return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
13525}
13526
13527/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
13528/// operands N0 and N1. This is a helper for PerformADDCombine that is
13529/// called with the default operands, and if that fails, with commuted
13530/// operands.
13533 const ARMSubtarget *Subtarget){
13534 // Attempt to create vpadd for this add.
13535 if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
13536 return Result;
13537
13538 // Attempt to create vpaddl for this add.
13539 if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
13540 return Result;
13541 if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
13542 Subtarget))
13543 return Result;
13544
13545 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
13546 if (N0.getNode()->hasOneUse())
13547 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
13548 return Result;
13549 return SDValue();
13550}
13551
13553 EVT VT = N->getValueType(0);
13554 SDValue N0 = N->getOperand(0);
13555 SDValue N1 = N->getOperand(1);
13556 SDLoc dl(N);
13557
13558 auto IsVecReduce = [](SDValue Op) {
13559 switch (Op.getOpcode()) {
13560 case ISD::VECREDUCE_ADD:
13561 case ARMISD::VADDVs:
13562 case ARMISD::VADDVu:
13563 case ARMISD::VMLAVs:
13564 case ARMISD::VMLAVu:
13565 return true;
13566 }
13567 return false;
13568 };
13569
13570 auto DistrubuteAddAddVecReduce = [&](SDValue N0, SDValue N1) {
13571 // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
13572 // add(add(X, vecreduce(Y)), vecreduce(Z))
13573 // to make better use of vaddva style instructions.
13574 if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
13575 IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
13576 !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {
13577 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
13578 return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
13579 }
13580 // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
13581 // add(add(add(A, C), reduce(B)), reduce(D))
13582 if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
13583 N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {
13584 unsigned N0RedOp = 0;
13585 if (!IsVecReduce(N0.getOperand(N0RedOp))) {
13586 N0RedOp = 1;
13587 if (!IsVecReduce(N0.getOperand(N0RedOp)))
13588 return SDValue();
13589 }
13590
13591 unsigned N1RedOp = 0;
13592 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13593 N1RedOp = 1;
13594 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13595 return SDValue();
13596
13597 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
13598 N1.getOperand(1 - N1RedOp));
13599 SDValue Add1 =
13600 DAG.getNode(ISD::ADD, dl, VT, Add0, N0.getOperand(N0RedOp));
13601 return DAG.getNode(ISD::ADD, dl, VT, Add1, N1.getOperand(N1RedOp));
13602 }
13603 return SDValue();
13604 };
13605 if (SDValue R = DistrubuteAddAddVecReduce(N0, N1))
13606 return R;
13607 if (SDValue R = DistrubuteAddAddVecReduce(N1, N0))
13608 return R;
13609
13610 // Distribute add(vecreduce(load(Y)), vecreduce(load(Z)))
13611 // Or add(add(X, vecreduce(load(Y))), vecreduce(load(Z)))
13612 // by ascending load offsets. This can help cores prefetch if the order of
13613 // loads is more predictable.
13614 auto DistrubuteVecReduceLoad = [&](SDValue N0, SDValue N1, bool IsForward) {
13615 // Check if two reductions are known to load data where one is before/after
13616 // another. Return negative if N0 loads data before N1, positive if N1 is
13617 // before N0 and 0 otherwise if nothing is known.
13618 auto IsKnownOrderedLoad = [&](SDValue N0, SDValue N1) {
13619 // Look through to the first operand of a MUL, for the VMLA case.
13620 // Currently only looks at the first operand, in the hope they are equal.
13621 if (N0.getOpcode() == ISD::MUL)
13622 N0 = N0.getOperand(0);
13623 if (N1.getOpcode() == ISD::MUL)
13624 N1 = N1.getOperand(0);
13625
13626 // Return true if the two operands are loads to the same object and the
13627 // offset of the first is known to be less than the offset of the second.
13628 LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);
13629 LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);
13630 if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
13631 !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
13632 Load1->isIndexed())
13633 return 0;
13634
13635 auto BaseLocDecomp0 = BaseIndexOffset::match(Load0, DAG);
13636 auto BaseLocDecomp1 = BaseIndexOffset::match(Load1, DAG);
13637
13638 if (!BaseLocDecomp0.getBase() ||
13639 BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
13640 !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
13641 return 0;
13642 if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
13643 return -1;
13644 if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
13645 return 1;
13646 return 0;
13647 };
13648
13649 SDValue X;
13650 if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {
13651 if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {
13652 int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),
13653 N0.getOperand(1).getOperand(0));
13654 if (IsBefore < 0) {
13655 X = N0.getOperand(0);
13656 N0 = N0.getOperand(1);
13657 } else if (IsBefore > 0) {
13658 X = N0.getOperand(1);
13659 N0 = N0.getOperand(0);
13660 } else
13661 return SDValue();
13662 } else if (IsVecReduce(N0.getOperand(0))) {
13663 X = N0.getOperand(1);
13664 N0 = N0.getOperand(0);
13665 } else if (IsVecReduce(N0.getOperand(1))) {
13666 X = N0.getOperand(0);
13667 N0 = N0.getOperand(1);
13668 } else
13669 return SDValue();
13670 } else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
13671 IsKnownOrderedLoad(N0.getOperand(0), N1.getOperand(0)) < 0) {
13672 // Note this is backward to how you would expect. We create
13673 // add(reduce(load + 16), reduce(load + 0)) so that the
13674 // add(reduce(load+16), X) is combined into VADDVA(X, load+16)), leaving
13675 // the X as VADDV(load + 0)
13676 return DAG.getNode(ISD::ADD, dl, VT, N1, N0);
13677 } else
13678 return SDValue();
13679
13680 if (!IsVecReduce(N0) || !IsVecReduce(N1))
13681 return SDValue();
13682
13683 if (IsKnownOrderedLoad(N1.getOperand(0), N0.getOperand(0)) >= 0)
13684 return SDValue();
13685
13686 // Switch from add(add(X, N0), N1) to add(add(X, N1), N0)
13687 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, X, N1);
13688 return DAG.getNode(ISD::ADD, dl, VT, Add0, N0);
13689 };
13690 if (SDValue R = DistrubuteVecReduceLoad(N0, N1, true))
13691 return R;
13692 if (SDValue R = DistrubuteVecReduceLoad(N1, N0, false))
13693 return R;
13694 return SDValue();
13695}
13696
13698 const ARMSubtarget *Subtarget) {
13699 if (!Subtarget->hasMVEIntegerOps())
13700 return SDValue();
13701
13703 return R;
13704
13705 EVT VT = N->getValueType(0);
13706 SDValue N0 = N->getOperand(0);
13707 SDValue N1 = N->getOperand(1);
13708 SDLoc dl(N);
13709
13710 if (VT != MVT::i64)
13711 return SDValue();
13712
13713 // We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
13714 // will look like:
13715 // t1: i32,i32 = ARMISD::VADDLVs x
13716 // t2: i64 = build_pair t1, t1:1
13717 // t3: i64 = add t2, y
13718 // Otherwise we try to push the add up above VADDLVAx, to potentially allow
13719 // the add to be simplified seperately.
13720 // We also need to check for sext / zext and commutitive adds.
13721 auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA,
13722 SDValue NB) {
13723 if (NB->getOpcode() != ISD::BUILD_PAIR)
13724 return SDValue();
13725 SDValue VecRed = NB->getOperand(0);
13726 if ((VecRed->getOpcode() != Opcode && VecRed->getOpcode() != OpcodeA) ||
13727 VecRed.getResNo() != 0 ||
13728 NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
13729 return SDValue();
13730
13731 if (VecRed->getOpcode() == OpcodeA) {
13732 // add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
13733 SDValue Inp = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
13734 VecRed.getOperand(0), VecRed.getOperand(1));
13735 NA = DAG.getNode(ISD::ADD, dl, MVT::i64, Inp, NA);
13736 }
13737
13739 std::tie(Ops[0], Ops[1]) = DAG.SplitScalar(NA, dl, MVT::i32, MVT::i32);
13740
13741 unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;
13742 for (unsigned I = S, E = VecRed.getNumOperands(); I < E; I++)
13743 Ops.push_back(VecRed->getOperand(I));
13744 SDValue Red =
13745 DAG.getNode(OpcodeA, dl, DAG.getVTList({MVT::i32, MVT::i32}), Ops);
13746 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Red,
13747 SDValue(Red.getNode(), 1));
13748 };
13749
13750 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
13751 return M;
13752 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
13753 return M;
13754 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
13755 return M;
13756 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
13757 return M;
13758 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
13759 return M;
13760 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
13761 return M;
13762 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
13763 return M;
13764 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
13765 return M;
13766 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
13767 return M;
13768 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
13769 return M;
13770 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
13771 return M;
13772 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
13773 return M;
13774 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
13775 return M;
13776 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
13777 return M;
13778 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
13779 return M;
13780 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
13781 return M;
13782 return SDValue();
13783}
13784
13785bool
13787 CombineLevel Level) const {
13788 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
13789 N->getOpcode() == ISD::SRL) &&
13790 "Expected shift op");
13791
13792 if (Level == BeforeLegalizeTypes)
13793 return true;
13794
13795 if (N->getOpcode() != ISD::SHL)
13796 return true;
13797
13798 if (Subtarget->isThumb1Only()) {
13799 // Avoid making expensive immediates by commuting shifts. (This logic
13800 // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
13801 // for free.)
13802 if (N->getOpcode() != ISD::SHL)
13803 return true;
13804 SDValue N1 = N->getOperand(0);
13805 if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
13806 N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
13807 return true;
13808 if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
13809 if (Const->getAPIntValue().ult(256))
13810 return false;
13811 if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
13812 Const->getAPIntValue().sgt(-256))
13813 return false;
13814 }
13815 return true;
13816 }
13817
13818 // Turn off commute-with-shift transform after legalization, so it doesn't
13819 // conflict with PerformSHLSimplify. (We could try to detect when
13820 // PerformSHLSimplify would trigger more precisely, but it isn't
13821 // really necessary.)
13822 return false;
13823}
13824
13826 const SDNode *N) const {
13827 assert(N->getOpcode() == ISD::XOR &&
13828 (N->getOperand(0).getOpcode() == ISD::SHL ||
13829 N->getOperand(0).getOpcode() == ISD::SRL) &&
13830 "Expected XOR(SHIFT) pattern");
13831
13832 // Only commute if the entire NOT mask is a hidden shifted mask.
13833 auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
13834 auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
13835 if (XorC && ShiftC) {
13836 unsigned MaskIdx, MaskLen;
13837 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13838 unsigned ShiftAmt = ShiftC->getZExtValue();
13839 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
13840 if (N->getOperand(0).getOpcode() == ISD::SHL)
13841 return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
13842 return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
13843 }
13844 }
13845
13846 return false;
13847}
13848
13850 const SDNode *N, CombineLevel Level) const {
13851 assert(((N->getOpcode() == ISD::SHL &&
13852 N->getOperand(0).getOpcode() == ISD::SRL) ||
13853 (N->getOpcode() == ISD::SRL &&
13854 N->getOperand(0).getOpcode() == ISD::SHL)) &&
13855 "Expected shift-shift mask");
13856
13857 if (!Subtarget->isThumb1Only())
13858 return true;
13859
13860 if (Level == BeforeLegalizeTypes)
13861 return true;
13862
13863 return false;
13864}
13865
13867 EVT VT) const {
13868 return Subtarget->hasMVEIntegerOps() && isTypeLegal(VT);
13869}
13870
13872 if (!Subtarget->hasNEON()) {
13873 if (Subtarget->isThumb1Only())
13874 return VT.getScalarSizeInBits() <= 32;
13875 return true;
13876 }
13877 return VT.isScalarInteger();
13878}
13879
13881 EVT VT) const {
13882 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
13883 return false;
13884
13885 switch (FPVT.getSimpleVT().SimpleTy) {
13886 case MVT::f16:
13887 return Subtarget->hasVFP2Base();
13888 case MVT::f32:
13889 return Subtarget->hasVFP2Base();
13890 case MVT::f64:
13891 return Subtarget->hasFP64();
13892 case MVT::v4f32:
13893 case MVT::v8f16:
13894 return Subtarget->hasMVEFloatOps();
13895 default:
13896 return false;
13897 }
13898}
13899
13902 const ARMSubtarget *ST) {
13903 // Allow the generic combiner to identify potential bswaps.
13904 if (DCI.isBeforeLegalize())
13905 return SDValue();
13906
13907 // DAG combiner will fold:
13908 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13909 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
13910 // Other code patterns that can be also be modified have the following form:
13911 // b + ((a << 1) | 510)
13912 // b + ((a << 1) & 510)
13913 // b + ((a << 1) ^ 510)
13914 // b + ((a << 1) + 510)
13915
13916 // Many instructions can perform the shift for free, but it requires both
13917 // the operands to be registers. If c1 << c2 is too large, a mov immediate
13918 // instruction will needed. So, unfold back to the original pattern if:
13919 // - if c1 and c2 are small enough that they don't require mov imms.
13920 // - the user(s) of the node can perform an shl
13921
13922 // No shifted operands for 16-bit instructions.
13923 if (ST->isThumb() && ST->isThumb1Only())
13924 return SDValue();
13925
13926 // Check that all the users could perform the shl themselves.
13927 for (auto *U : N->uses()) {
13928 switch(U->getOpcode()) {
13929 default:
13930 return SDValue();
13931 case ISD::SUB:
13932 case ISD::ADD:
13933 case ISD::AND:
13934 case ISD::OR:
13935 case ISD::XOR:
13936 case ISD::SETCC:
13937 case ARMISD::CMP:
13938 // Check that the user isn't already using a constant because there
13939 // aren't any instructions that support an immediate operand and a
13940 // shifted operand.
13941 if (isa<ConstantSDNode>(U->getOperand(0)) ||
13942 isa<ConstantSDNode>(U->getOperand(1)))
13943 return SDValue();
13944
13945 // Check that it's not already using a shift.
13946 if (U->getOperand(0).getOpcode() == ISD::SHL ||
13947 U->getOperand(1).getOpcode() == ISD::SHL)
13948 return SDValue();
13949 break;
13950 }
13951 }
13952
13953 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
13954 N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
13955 return SDValue();
13956
13957 if (N->getOperand(0).getOpcode() != ISD::SHL)
13958 return SDValue();
13959
13960 SDValue SHL = N->getOperand(0);
13961
13962 auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
13963 auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
13964 if (!C1ShlC2 || !C2)
13965 return SDValue();
13966
13967 APInt C2Int = C2->getAPIntValue();
13968 APInt C1Int = C1ShlC2->getAPIntValue();
13969 unsigned C2Width = C2Int.getBitWidth();
13970 if (C2Int.uge(C2Width))
13971 return SDValue();
13972 uint64_t C2Value = C2Int.getZExtValue();
13973
13974 // Check that performing a lshr will not lose any information.
13975 APInt Mask = APInt::getHighBitsSet(C2Width, C2Width - C2Value);
13976 if ((C1Int & Mask) != C1Int)
13977 return SDValue();
13978
13979 // Shift the first constant.
13980 C1Int.lshrInPlace(C2Int);
13981
13982 // The immediates are encoded as an 8-bit value that can be rotated.
13983 auto LargeImm = [](const APInt &Imm) {
13984 unsigned Zeros = Imm.countl_zero() + Imm.countr_zero();
13985 return Imm.getBitWidth() - Zeros > 8;
13986 };
13987
13988 if (LargeImm(C1Int) || LargeImm(C2Int))
13989 return SDValue();
13990
13991 SelectionDAG &DAG = DCI.DAG;
13992 SDLoc dl(N);
13993 SDValue X = SHL.getOperand(0);
13994 SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
13995 DAG.getConstant(C1Int, dl, MVT::i32));
13996 // Shift left to compensate for the lshr of C1Int.
13997 SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
13998
13999 LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
14000 SHL.dump(); N->dump());
14001 LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
14002 return Res;
14003}
14004
14005
14006/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
14007///
14010 const ARMSubtarget *Subtarget) {
14011 SDValue N0 = N->getOperand(0);
14012 SDValue N1 = N->getOperand(1);
14013
14014 // Only works one way, because it needs an immediate operand.
14015 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14016 return Result;
14017
14018 if (SDValue Result = PerformADDVecReduce(N, DCI.DAG, Subtarget))
14019 return Result;
14020
14021 // First try with the default operand order.
14022 if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
14023 return Result;
14024
14025 // If that didn't work, try again with the operands commuted.
14026 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
14027}
14028
14029// Combine (sub 0, (csinc X, Y, CC)) -> (csinv -X, Y, CC)
14030// providing -X is as cheap as X (currently, just a constant).
14032 if (N->getValueType(0) != MVT::i32 || !isNullConstant(N->getOperand(0)))
14033 return SDValue();
14034 SDValue CSINC = N->getOperand(1);
14035 if (CSINC.getOpcode() != ARMISD::CSINC || !CSINC.hasOneUse())
14036 return SDValue();
14037
14038 ConstantSDNode *X = dyn_cast<ConstantSDNode>(CSINC.getOperand(0));
14039 if (!X)
14040 return SDValue();
14041
14042 return DAG.getNode(ARMISD::CSINV, SDLoc(N), MVT::i32,
14043 DAG.getNode(ISD::SUB, SDLoc(N), MVT::i32, N->getOperand(0),
14044 CSINC.getOperand(0)),
14045 CSINC.getOperand(1), CSINC.getOperand(2),
14046 CSINC.getOperand(3));
14047}
14048
14049/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
14050///
14053 const ARMSubtarget *Subtarget) {
14054 SDValue N0 = N->getOperand(0);
14055 SDValue N1 = N->getOperand(1);
14056
14057 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
14058 if (N1.getNode()->hasOneUse())
14059 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
14060 return Result;
14061
14062 if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
14063 return R;
14064
14065 if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
14066 return SDValue();
14067
14068 // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
14069 // so that we can readily pattern match more mve instructions which can use
14070 // a scalar operand.
14071 SDValue VDup = N->getOperand(1);
14072 if (VDup->getOpcode() != ARMISD::VDUP)
14073 return SDValue();
14074
14075 SDValue VMov = N->getOperand(0);
14076 if (VMov->getOpcode() == ISD::BITCAST)
14077 VMov = VMov->getOperand(0);
14078
14079 if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
14080 return SDValue();
14081
14082 SDLoc dl(N);
14083 SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,
14084 DCI.DAG.getConstant(0, dl, MVT::i32),
14085 VDup->getOperand(0));
14086 return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
14087}
14088
14089/// PerformVMULCombine
14090/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
14091/// special multiplier accumulator forwarding.
14092/// vmul d3, d0, d2
14093/// vmla d3, d1, d2
14094/// is faster than
14095/// vadd d3, d0, d1
14096/// vmul d3, d3, d2
14097// However, for (A + B) * (A + B),
14098// vadd d2, d0, d1
14099// vmul d3, d0, d2
14100// vmla d3, d1, d2
14101// is slower than
14102// vadd d2, d0, d1
14103// vmul d3, d2, d2
14106 const ARMSubtarget *Subtarget) {
14107 if (!Subtarget->hasVMLxForwarding())
14108 return SDValue();
14109
14110 SelectionDAG &DAG = DCI.DAG;
14111 SDValue N0 = N->getOperand(0);
14112 SDValue N1 = N->getOperand(1);
14113 unsigned Opcode = N0.getOpcode();
14114 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14115 Opcode != ISD::FADD && Opcode != ISD::FSUB) {
14116 Opcode = N1.getOpcode();
14117 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14118 Opcode != ISD::FADD && Opcode != ISD::FSUB)
14119 return SDValue();
14120 std::swap(N0, N1);
14121 }
14122
14123 if (N0 == N1)
14124 return SDValue();
14125
14126 EVT VT = N->getValueType(0);
14127 SDLoc DL(N);
14128 SDValue N00 = N0->getOperand(0);
14129 SDValue N01 = N0->getOperand(1);
14130 return DAG.getNode(Opcode, DL, VT,
14131 DAG.getNode(ISD::MUL, DL, VT, N00, N1),
14132 DAG.getNode(ISD::MUL, DL, VT, N01, N1));
14133}
14134
14136 const ARMSubtarget *Subtarget) {
14137 EVT VT = N->getValueType(0);
14138 if (VT != MVT::v2i64)
14139 return SDValue();
14140
14141 SDValue N0 = N->getOperand(0);
14142 SDValue N1 = N->getOperand(1);
14143
14144 auto IsSignExt = [&](SDValue Op) {
14145 if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
14146 return SDValue();
14147 EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
14148 if (VT.getScalarSizeInBits() == 32)
14149 return Op->getOperand(0);
14150 return SDValue();
14151 };
14152 auto IsZeroExt = [&](SDValue Op) {
14153 // Zero extends are a little more awkward. At the point we are matching
14154 // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
14155 // That might be before of after a bitcast depending on how the and is
14156 // placed. Because this has to look through bitcasts, it is currently only
14157 // supported on LE.
14158 if (!Subtarget->isLittle())
14159 return SDValue();
14160
14161 SDValue And = Op;
14162 if (And->getOpcode() == ISD::BITCAST)
14163 And = And->getOperand(0);
14164 if (And->getOpcode() != ISD::AND)
14165 return SDValue();
14166 SDValue Mask = And->getOperand(1);
14167 if (Mask->getOpcode() == ISD::BITCAST)
14168 Mask = Mask->getOperand(0);
14169
14170 if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
14171 Mask.getValueType() != MVT::v4i32)
14172 return SDValue();
14173 if (isAllOnesConstant(Mask->getOperand(0)) &&
14174 isNullConstant(Mask->getOperand(1)) &&
14175 isAllOnesConstant(Mask->getOperand(2)) &&
14176 isNullConstant(Mask->getOperand(3)))
14177 return And->getOperand(0);
14178 return SDValue();
14179 };
14180
14181 SDLoc dl(N);
14182 if (SDValue Op0 = IsSignExt(N0)) {
14183 if (SDValue Op1 = IsSignExt(N1)) {
14184 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14185 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14186 return DAG.getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
14187 }
14188 }
14189 if (SDValue Op0 = IsZeroExt(N0)) {
14190 if (SDValue Op1 = IsZeroExt(N1)) {
14191 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14192 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14193 return DAG.getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
14194 }
14195 }
14196
14197 return SDValue();
14198}
14199
14202 const ARMSubtarget *Subtarget) {
14203 SelectionDAG &DAG = DCI.DAG;
14204
14205 EVT VT = N->getValueType(0);
14206 if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
14207 return PerformMVEVMULLCombine(N, DAG, Subtarget);
14208
14209 if (Subtarget->isThumb1Only())
14210 return SDValue();
14211
14212 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14213 return SDValue();
14214
14215 if (VT.is64BitVector() || VT.is128BitVector())
14216 return PerformVMULCombine(N, DCI, Subtarget);
14217 if (VT != MVT::i32)
14218 return SDValue();
14219
14220 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14221 if (!C)
14222 return SDValue();
14223
14224 int64_t MulAmt = C->getSExtValue();
14225 unsigned ShiftAmt = llvm::countr_zero<uint64_t>(MulAmt);
14226
14227 ShiftAmt = ShiftAmt & (32 - 1);
14228 SDValue V = N->getOperand(0);
14229 SDLoc DL(N);
14230
14231 SDValue Res;
14232 MulAmt >>= ShiftAmt;
14233
14234 if (MulAmt >= 0) {
14235 if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {
14236 // (mul x, 2^N + 1) => (add (shl x, N), x)
14237 Res = DAG.getNode(ISD::ADD, DL, VT,
14238 V,
14239 DAG.getNode(ISD::SHL, DL, VT,
14240 V,
14241 DAG.getConstant(Log2_32(MulAmt - 1), DL,
14242 MVT::i32)));
14243 } else if (llvm::has_single_bit<uint32_t>(MulAmt + 1)) {
14244 // (mul x, 2^N - 1) => (sub (shl x, N), x)
14245 Res = DAG.getNode(ISD::SUB, DL, VT,
14246 DAG.getNode(ISD::SHL, DL, VT,
14247 V,
14248 DAG.getConstant(Log2_32(MulAmt + 1), DL,
14249 MVT::i32)),
14250 V);
14251 } else
14252 return SDValue();
14253 } else {
14254 uint64_t MulAmtAbs = -MulAmt;
14255 if (llvm::has_single_bit<uint32_t>(MulAmtAbs + 1)) {
14256 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
14257 Res = DAG.getNode(ISD::SUB, DL, VT,
14258 V,
14259 DAG.getNode(ISD::SHL, DL, VT,
14260 V,
14261 DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
14262 MVT::i32)));
14263 } else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {
14264 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
14265 Res = DAG.getNode(ISD::ADD, DL, VT,
14266 V,
14267 DAG.getNode(ISD::SHL, DL, VT,
14268 V,
14269 DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
14270 MVT::i32)));
14271 Res = DAG.getNode(ISD::SUB, DL, VT,
14272 DAG.getConstant(0, DL, MVT::i32), Res);
14273 } else
14274 return SDValue();
14275 }
14276
14277 if (ShiftAmt != 0)
14278 Res = DAG.getNode(ISD::SHL, DL, VT,
14279 Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
14280
14281 // Do not add new nodes to DAG combiner worklist.
14282 DCI.CombineTo(N, Res, false);
14283 return SDValue();
14284}
14285
14288 const ARMSubtarget *Subtarget) {
14289 // Allow DAGCombine to pattern-match before we touch the canonical form.
14290 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14291 return SDValue();
14292
14293 if (N->getValueType(0) != MVT::i32)
14294 return SDValue();
14295
14296 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14297 if (!N1C)
14298 return SDValue();
14299
14300 uint32_t C1 = (uint32_t)N1C->getZExtValue();
14301 // Don't transform uxtb/uxth.
14302 if (C1 == 255 || C1 == 65535)
14303 return SDValue();
14304
14305 SDNode *N0 = N->getOperand(0).getNode();
14306 if (!N0->hasOneUse())
14307 return SDValue();
14308
14309 if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
14310 return SDValue();
14311
14312 bool LeftShift = N0->getOpcode() == ISD::SHL;
14313
14314 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14315 if (!N01C)
14316 return SDValue();
14317
14318 uint32_t C2 = (uint32_t)N01C->getZExtValue();
14319 if (!C2 || C2 >= 32)
14320 return SDValue();
14321
14322 // Clear irrelevant bits in the mask.
14323 if (LeftShift)
14324 C1 &= (-1U << C2);
14325 else
14326 C1 &= (-1U >> C2);
14327
14328 SelectionDAG &DAG = DCI.DAG;
14329 SDLoc DL(N);
14330
14331 // We have a pattern of the form "(and (shl x, c2) c1)" or
14332 // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
14333 // transform to a pair of shifts, to save materializing c1.
14334
14335 // First pattern: right shift, then mask off leading bits.
14336 // FIXME: Use demanded bits?
14337 if (!LeftShift && isMask_32(C1)) {
14338 uint32_t C3 = llvm::countl_zero(C1);
14339 if (C2 < C3) {
14340 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14341 DAG.getConstant(C3 - C2, DL, MVT::i32));
14342 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14343 DAG.getConstant(C3, DL, MVT::i32));
14344 }
14345 }
14346
14347 // First pattern, reversed: left shift, then mask off trailing bits.
14348 if (LeftShift && isMask_32(~C1)) {
14349 uint32_t C3 = llvm::countr_zero(C1);
14350 if (C2 < C3) {
14351 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14352 DAG.getConstant(C3 - C2, DL, MVT::i32));
14353 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14354 DAG.getConstant(C3, DL, MVT::i32));
14355 }
14356 }
14357
14358 // Second pattern: left shift, then mask off leading bits.
14359 // FIXME: Use demanded bits?
14360 if (LeftShift && isShiftedMask_32(C1)) {
14361 uint32_t Trailing = llvm::countr_zero(C1);
14362 uint32_t C3 = llvm::countl_zero(C1);
14363 if (Trailing == C2 && C2 + C3 < 32) {
14364 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14365 DAG.getConstant(C2 + C3, DL, MVT::i32));
14366 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14367 DAG.getConstant(C3, DL, MVT::i32));
14368 }
14369 }
14370
14371 // Second pattern, reversed: right shift, then mask off trailing bits.
14372 // FIXME: Handle other patterns of known/demanded bits.
14373 if (!LeftShift && isShiftedMask_32(C1)) {
14374 uint32_t Leading = llvm::countl_zero(C1);
14375 uint32_t C3 = llvm::countr_zero(C1);
14376 if (Leading == C2 && C2 + C3 < 32) {
14377 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14378 DAG.getConstant(C2 + C3, DL, MVT::i32));
14379 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14380 DAG.getConstant(C3, DL, MVT::i32));
14381 }
14382 }
14383
14384 // FIXME: Transform "(and (shl x, c2) c1)" ->
14385 // "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
14386 // c1.
14387 return SDValue();
14388}
14389
14392 const ARMSubtarget *Subtarget) {
14393 // Attempt to use immediate-form VBIC
14394 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14395 SDLoc dl(N);
14396 EVT VT = N->getValueType(0);
14397 SelectionDAG &DAG = DCI.DAG;
14398
14399 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 ||
14400 VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
14401 return SDValue();
14402
14403 APInt SplatBits, SplatUndef;
14404 unsigned SplatBitSize;
14405 bool HasAnyUndefs;
14406 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14407 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14408 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14409 SplatBitSize == 64) {
14410 EVT VbicVT;
14411 SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
14412 SplatUndef.getZExtValue(), SplatBitSize,
14413 DAG, dl, VbicVT, VT, OtherModImm);
14414 if (Val.getNode()) {
14415 SDValue Input =
14416 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
14417 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
14418 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
14419 }
14420 }
14421 }
14422
14423 if (!Subtarget->isThumb1Only()) {
14424 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
14425 if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
14426 return Result;
14427
14428 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14429 return Result;
14430 }
14431
14432 if (Subtarget->isThumb1Only())
14433 if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
14434 return Result;
14435
14436 return SDValue();
14437}
14438
14439// Try combining OR nodes to SMULWB, SMULWT.
14442 const ARMSubtarget *Subtarget) {
14443 if (!Subtarget->hasV6Ops() ||
14444 (Subtarget->isThumb() &&
14445 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14446 return SDValue();
14447
14448 SDValue SRL = OR->getOperand(0);
14449 SDValue SHL = OR->getOperand(1);
14450
14451 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
14452 SRL = OR->getOperand(1);
14453 SHL = OR->getOperand(0);
14454 }
14455 if (!isSRL16(SRL) || !isSHL16(SHL))
14456 return SDValue();
14457
14458 // The first operands to the shifts need to be the two results from the
14459 // same smul_lohi node.
14460 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
14461 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
14462 return SDValue();
14463
14464 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
14465 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
14466 SHL.getOperand(0) != SDValue(SMULLOHI, 1))
14467 return SDValue();
14468
14469 // Now we have:
14470 // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
14471 // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
14472 // For SMUWB the 16-bit value will signed extended somehow.
14473 // For SMULWT only the SRA is required.
14474 // Check both sides of SMUL_LOHI
14475 SDValue OpS16 = SMULLOHI->getOperand(0);
14476 SDValue OpS32 = SMULLOHI->getOperand(1);
14477
14478 SelectionDAG &DAG = DCI.DAG;
14479 if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
14480 OpS16 = OpS32;
14481 OpS32 = SMULLOHI->getOperand(0);
14482 }
14483
14484 SDLoc dl(OR);
14485 unsigned Opcode = 0;
14486 if (isS16(OpS16, DAG))
14487 Opcode = ARMISD::SMULWB;
14488 else if (isSRA16(OpS16)) {
14489 Opcode = ARMISD::SMULWT;
14490 OpS16 = OpS16->getOperand(0);
14491 }
14492 else
14493 return SDValue();
14494
14495 SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
14496 DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
14497 return SDValue(OR, 0);
14498}
14499
14502 const ARMSubtarget *Subtarget) {
14503 // BFI is only available on V6T2+
14504 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
14505 return SDValue();
14506
14507 EVT VT = N->getValueType(0);
14508 SDValue N0 = N->getOperand(0);
14509 SDValue N1 = N->getOperand(1);
14510 SelectionDAG &DAG = DCI.DAG;
14511 SDLoc DL(N);
14512 // 1) or (and A, mask), val => ARMbfi A, val, mask
14513 // iff (val & mask) == val
14514 //
14515 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14516 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
14517 // && mask == ~mask2
14518 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
14519 // && ~mask == mask2
14520 // (i.e., copy a bitfield value into another bitfield of the same width)
14521
14522 if (VT != MVT::i32)
14523 return SDValue();
14524
14525 SDValue N00 = N0.getOperand(0);
14526
14527 // The value and the mask need to be constants so we can verify this is
14528 // actually a bitfield set. If the mask is 0xffff, we can do better
14529 // via a movt instruction, so don't use BFI in that case.
14530 SDValue MaskOp = N0.getOperand(1);
14531 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
14532 if (!MaskC)
14533 return SDValue();
14534 unsigned Mask = MaskC->getZExtValue();
14535 if (Mask == 0xffff)
14536 return SDValue();
14537 SDValue Res;
14538 // Case (1): or (and A, mask), val => ARMbfi A, val, mask
14539 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
14540 if (N1C) {
14541 unsigned Val = N1C->getZExtValue();
14542 if ((Val & ~Mask) != Val)
14543 return SDValue();
14544
14545 if (ARM::isBitFieldInvertedMask(Mask)) {
14546 Val >>= llvm::countr_zero(~Mask);
14547
14548 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
14549 DAG.getConstant(Val, DL, MVT::i32),
14550 DAG.getConstant(Mask, DL, MVT::i32));
14551
14552 DCI.CombineTo(N, Res, false);
14553 // Return value from the original node to inform the combiner than N is
14554 // now dead.
14555 return SDValue(N, 0);
14556 }
14557 } else if (N1.getOpcode() == ISD::AND) {
14558 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14559 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14560 if (!N11C)
14561 return SDValue();
14562 unsigned Mask2 = N11C->getZExtValue();
14563
14564 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
14565 // as is to match.
14566 if (ARM::isBitFieldInvertedMask(Mask) &&
14567 (Mask == ~Mask2)) {
14568 // The pack halfword instruction works better for masks that fit it,
14569 // so use that when it's available.
14570 if (Subtarget->hasDSP() &&
14571 (Mask == 0xffff || Mask == 0xffff0000))
14572 return SDValue();
14573 // 2a
14574 unsigned amt = llvm::countr_zero(Mask2);
14575 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
14576 DAG.getConstant(amt, DL, MVT::i32));
14577 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
14578 DAG.getConstant(Mask, DL, MVT::i32));
14579 DCI.CombineTo(N, Res, false);
14580 // Return value from the original node to inform the combiner than N is
14581 // now dead.
14582 return SDValue(N, 0);
14583 } else if (ARM::isBitFieldInvertedMask(~Mask) &&
14584 (~Mask == Mask2)) {
14585 // The pack halfword instruction works better for masks that fit it,
14586 // so use that when it's available.
14587 if (Subtarget->hasDSP() &&
14588 (Mask2 == 0xffff || Mask2 == 0xffff0000))
14589 return SDValue();
14590 // 2b
14591 unsigned lsb = llvm::countr_zero(Mask);
14592 Res = DAG.getNode(ISD::SRL, DL, VT, N00,
14593 DAG.getConstant(lsb, DL, MVT::i32));
14594 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
14595 DAG.getConstant(Mask2, DL, MVT::i32));
14596 DCI.CombineTo(N, Res, false);
14597 // Return value from the original node to inform the combiner than N is
14598 // now dead.
14599 return SDValue(N, 0);
14600 }
14601 }
14602
14603 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
14604 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
14606 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
14607 // where lsb(mask) == #shamt and masked bits of B are known zero.
14608 SDValue ShAmt = N00.getOperand(1);
14609 unsigned ShAmtC = ShAmt->getAsZExtVal();
14610 unsigned LSB = llvm::countr_zero(Mask);
14611 if (ShAmtC != LSB)
14612 return SDValue();
14613
14614 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
14615 DAG.getConstant(~Mask, DL, MVT::i32));
14616
14617 DCI.CombineTo(N, Res, false);
14618 // Return value from the original node to inform the combiner than N is
14619 // now dead.
14620 return SDValue(N, 0);
14621 }
14622
14623 return SDValue();
14624}
14625
14626static bool isValidMVECond(unsigned CC, bool IsFloat) {
14627 switch (CC) {
14628 case ARMCC::EQ:
14629 case ARMCC::NE:
14630 case ARMCC::LE:
14631 case ARMCC::GT:
14632 case ARMCC::GE:
14633 case ARMCC::LT:
14634 return true;
14635 case ARMCC::HS:
14636 case ARMCC::HI:
14637 return !IsFloat;
14638 default:
14639 return false;
14640 };
14641}
14642
14644 if (N->getOpcode() == ARMISD::VCMP)
14645 return (ARMCC::CondCodes)N->getConstantOperandVal(2);
14646 else if (N->getOpcode() == ARMISD::VCMPZ)
14647 return (ARMCC::CondCodes)N->getConstantOperandVal(1);
14648 else
14649 llvm_unreachable("Not a VCMP/VCMPZ!");
14650}
14651
14654 return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
14655}
14656
14658 const ARMSubtarget *Subtarget) {
14659 // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
14660 // together with predicates
14661 EVT VT = N->getValueType(0);
14662 SDLoc DL(N);
14663 SDValue N0 = N->getOperand(0);
14664 SDValue N1 = N->getOperand(1);
14665
14666 auto IsFreelyInvertable = [&](SDValue V) {
14667 if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14668 return CanInvertMVEVCMP(V);
14669 return false;
14670 };
14671
14672 // At least one operand must be freely invertable.
14673 if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
14674 return SDValue();
14675
14676 SDValue NewN0 = DAG.getLogicalNOT(DL, N0, VT);
14677 SDValue NewN1 = DAG.getLogicalNOT(DL, N1, VT);
14678 SDValue And = DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1);
14679 return DAG.getLogicalNOT(DL, And, VT);
14680}
14681
14682/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
14685 const ARMSubtarget *Subtarget) {
14686 // Attempt to use immediate-form VORR
14687 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14688 SDLoc dl(N);
14689 EVT VT = N->getValueType(0);
14690 SelectionDAG &DAG = DCI.DAG;
14691
14692 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14693 return SDValue();
14694
14695 if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14696 VT == MVT::v8i1 || VT == MVT::v16i1))
14697 return PerformORCombine_i1(N, DAG, Subtarget);
14698
14699 APInt SplatBits, SplatUndef;
14700 unsigned SplatBitSize;
14701 bool HasAnyUndefs;
14702 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14703 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14704 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14705 SplatBitSize == 64) {
14706 EVT VorrVT;
14707 SDValue Val =
14708 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
14709 SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
14710 if (Val.getNode()) {
14711 SDValue Input =
14712 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
14713 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
14714 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
14715 }
14716 }
14717 }
14718
14719 if (!Subtarget->isThumb1Only()) {
14720 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
14721 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14722 return Result;
14723 if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
14724 return Result;
14725 }
14726
14727 SDValue N0 = N->getOperand(0);
14728 SDValue N1 = N->getOperand(1);
14729
14730 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
14731 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
14733
14734 // The code below optimizes (or (and X, Y), Z).
14735 // The AND operand needs to have a single user to make these optimizations
14736 // profitable.
14737 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
14738 return SDValue();
14739
14740 APInt SplatUndef;
14741 unsigned SplatBitSize;
14742 bool HasAnyUndefs;
14743
14744 APInt SplatBits0, SplatBits1;
14745 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
14746 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
14747 // Ensure that the second operand of both ands are constants
14748 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14749 HasAnyUndefs) && !HasAnyUndefs) {
14750 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14751 HasAnyUndefs) && !HasAnyUndefs) {
14752 // Ensure that the bit width of the constants are the same and that
14753 // the splat arguments are logical inverses as per the pattern we
14754 // are trying to simplify.
14755 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
14756 SplatBits0 == ~SplatBits1) {
14757 // Canonicalize the vector type to make instruction selection
14758 // simpler.
14759 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
14760 SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
14761 N0->getOperand(1),
14762 N0->getOperand(0),
14763 N1->getOperand(0));
14764 return DAG.getNode(ISD::BITCAST, dl, VT, Result);
14765 }
14766 }
14767 }
14768 }
14769
14770 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
14771 // reasonable.
14772 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
14773 if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
14774 return Res;
14775 }
14776
14777 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14778 return Result;
14779
14780 return SDValue();
14781}
14782
14785 const ARMSubtarget *Subtarget) {
14786 EVT VT = N->getValueType(0);
14787 SelectionDAG &DAG = DCI.DAG;
14788
14789 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14790 return SDValue();
14791
14792 if (!Subtarget->isThumb1Only()) {
14793 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
14794 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14795 return Result;
14796
14797 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14798 return Result;
14799 }
14800
14801 if (Subtarget->hasMVEIntegerOps()) {
14802 // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition.
14803 SDValue N0 = N->getOperand(0);
14804 SDValue N1 = N->getOperand(1);
14805 const TargetLowering *TLI = Subtarget->getTargetLowering();
14806 if (TLI->isConstTrueVal(N1) &&
14807 (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
14808 if (CanInvertMVEVCMP(N0)) {
14809 SDLoc DL(N0);
14811
14813 Ops.push_back(N0->getOperand(0));
14814 if (N0->getOpcode() == ARMISD::VCMP)
14815 Ops.push_back(N0->getOperand(1));
14816 Ops.push_back(DAG.getConstant(CC, DL, MVT::i32));
14817 return DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
14818 }
14819 }
14820 }
14821
14822 return SDValue();
14823}
14824
14825// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
14826// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
14827// their position in "to" (Rd).
14828static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
14829 assert(N->getOpcode() == ARMISD::BFI);
14830
14831 SDValue From = N->getOperand(1);
14832 ToMask = ~N->getConstantOperandAPInt(2);
14833 FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.popcount());
14834
14835 // If the Base came from a SHR #C, we can deduce that it is really testing bit
14836 // #C in the base of the SHR.
14837 if (From->getOpcode() == ISD::SRL &&
14838 isa<ConstantSDNode>(From->getOperand(1))) {
14839 APInt Shift = From->getConstantOperandAPInt(1);
14840 assert(Shift.getLimitedValue() < 32 && "Shift too large!");
14841 FromMask <<= Shift.getLimitedValue(31);
14842 From = From->getOperand(0);
14843 }
14844
14845 return From;
14846}
14847
14848// If A and B contain one contiguous set of bits, does A | B == A . B?
14849//
14850// Neither A nor B must be zero.
14851static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
14852 unsigned LastActiveBitInA = A.countr_zero();
14853 unsigned FirstActiveBitInB = B.getBitWidth() - B.countl_zero() - 1;
14854 return LastActiveBitInA - 1 == FirstActiveBitInB;
14855}
14856
14858 // We have a BFI in N. Find a BFI it can combine with, if one exists.
14859 APInt ToMask, FromMask;
14860 SDValue From = ParseBFI(N, ToMask, FromMask);
14861 SDValue To = N->getOperand(0);
14862
14863 SDValue V = To;
14864 if (V.getOpcode() != ARMISD::BFI)
14865 return SDValue();
14866
14867 APInt NewToMask, NewFromMask;
14868 SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
14869 if (NewFrom != From)
14870 return SDValue();
14871
14872 // Do the written bits conflict with any we've seen so far?
14873 if ((NewToMask & ToMask).getBoolValue())
14874 // Conflicting bits.
14875 return SDValue();
14876
14877 // Are the new bits contiguous when combined with the old bits?
14878 if (BitsProperlyConcatenate(ToMask, NewToMask) &&
14879 BitsProperlyConcatenate(FromMask, NewFromMask))
14880 return V;
14881 if (BitsProperlyConcatenate(NewToMask, ToMask) &&
14882 BitsProperlyConcatenate(NewFromMask, FromMask))
14883 return V;
14884
14885 return SDValue();
14886}
14887
14889 SDValue N0 = N->getOperand(0);
14890 SDValue N1 = N->getOperand(1);
14891
14892 if (N1.getOpcode() == ISD::AND) {
14893 // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
14894 // the bits being cleared by the AND are not demanded by the BFI.
14895 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14896 if (!N11C)
14897 return SDValue();
14898 unsigned InvMask = N->getConstantOperandVal(2);
14899 unsigned LSB = llvm::countr_zero(~InvMask);
14900 unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
14901 assert(Width <
14902 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
14903 "undefined behavior");
14904 unsigned Mask = (1u << Width) - 1;
14905 unsigned Mask2 = N11C->getZExtValue();
14906 if ((Mask & (~Mask2)) == 0)
14907 return DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
14908 N->getOperand(0), N1.getOperand(0), N->getOperand(2));
14909 return SDValue();
14910 }
14911
14912 // Look for another BFI to combine with.
14913 if (SDValue CombineBFI = FindBFIToCombineWith(N)) {
14914 // We've found a BFI.
14915 APInt ToMask1, FromMask1;
14916 SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
14917
14918 APInt ToMask2, FromMask2;
14919 SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
14920 assert(From1 == From2);
14921 (void)From2;
14922
14923 // Create a new BFI, combining the two together.
14924 APInt NewFromMask = FromMask1 | FromMask2;
14925 APInt NewToMask = ToMask1 | ToMask2;
14926
14927 EVT VT = N->getValueType(0);
14928 SDLoc dl(N);
14929
14930 if (NewFromMask[0] == 0)
14931 From1 = DAG.getNode(ISD::SRL, dl, VT, From1,
14932 DAG.getConstant(NewFromMask.countr_zero(), dl, VT));
14933 return DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,
14934 DAG.getConstant(~NewToMask, dl, VT));
14935 }
14936
14937 // Reassociate BFI(BFI (A, B, M1), C, M2) to BFI(BFI (A, C, M2), B, M1) so
14938 // that lower bit insertions are performed first, providing that M1 and M2
14939 // do no overlap. This can allow multiple BFI instructions to be combined
14940 // together by the other folds above.
14941 if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
14942 APInt ToMask1 = ~N->getConstantOperandAPInt(2);
14943 APInt ToMask2 = ~N0.getConstantOperandAPInt(2);
14944
14945 if (!N0.hasOneUse() || (ToMask1 & ToMask2) != 0 ||
14946 ToMask1.countl_zero() < ToMask2.countl_zero())
14947 return SDValue();
14948
14949 EVT VT = N->getValueType(0);
14950 SDLoc dl(N);
14951 SDValue BFI1 = DAG.getNode(ARMISD::BFI, dl, VT, N0.getOperand(0),
14952 N->getOperand(1), N->getOperand(2));
14953 return DAG.getNode(ARMISD::BFI, dl, VT, BFI1, N0.getOperand(1),
14954 N0.getOperand(2));
14955 }
14956
14957 return SDValue();
14958}
14959
14960// Check that N is CMPZ(CSINC(0, 0, CC, X)),
14961// or CMPZ(CMOV(1, 0, CC, $cpsr, X))
14962// return X if valid.
14964 if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
14965 return SDValue();
14966 SDValue CSInc = Cmp->getOperand(0);
14967
14968 // Ignore any `And 1` nodes that may not yet have been removed. We are
14969 // looking for a value that produces 1/0, so these have no effect on the
14970 // code.
14971 while (CSInc.getOpcode() == ISD::AND &&
14972 isa<ConstantSDNode>(CSInc.getOperand(1)) &&
14973 CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
14974 CSInc = CSInc.getOperand(0);
14975
14976 if (CSInc.getOpcode() == ARMISD::CSINC &&
14977 isNullConstant(CSInc.getOperand(0)) &&
14978 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
14980 return CSInc.getOperand(3);
14981 }
14982 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
14983 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
14985 return CSInc.getOperand(4);
14986 }
14987 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
14988 isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
14991 return CSInc.getOperand(4);
14992 }
14993 return SDValue();
14994}
14995
14997 // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
14998 // t92: glue = ARMISD::CMPZ t74, 0
14999 // t93: i32 = ARMISD::CSINC 0, 0, 1, t92
15000 // t96: glue = ARMISD::CMPZ t93, 0
15001 // t114: i32 = ARMISD::CSINV 0, 0, 0, t96
15003 if (SDValue C = IsCMPZCSINC(N, Cond))
15004 if (Cond == ARMCC::EQ)
15005 return C;
15006 return SDValue();
15007}
15008
15010 // Fold away an unneccessary CMPZ/CSINC
15011 // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
15012 // if C1==EQ -> CSXYZ A, B, C2, D
15013 // if C1==NE -> CSXYZ A, B, NOT(C2), D
15015 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
15016 if (N->getConstantOperandVal(2) == ARMCC::EQ)
15017 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15018 N->getOperand(1),
15019 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
15020 if (N->getConstantOperandVal(2) == ARMCC::NE)
15021 return DAG.getNode(
15022 N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15023 N->getOperand(1),
15025 }
15026 return SDValue();
15027}
15028
15029/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
15030/// ARMISD::VMOVRRD.
15033 const ARMSubtarget *Subtarget) {
15034 // vmovrrd(vmovdrr x, y) -> x,y
15035 SDValue InDouble = N->getOperand(0);
15036 if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
15037 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
15038
15039 // vmovrrd(load f64) -> (load i32), (load i32)
15040 SDNode *InNode = InDouble.getNode();
15041 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
15042 InNode->getValueType(0) == MVT::f64 &&
15043 InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
15044 !cast<LoadSDNode>(InNode)->isVolatile()) {
15045 // TODO: Should this be done for non-FrameIndex operands?
15046 LoadSDNode *LD = cast<LoadSDNode>(InNode);
15047
15048 SelectionDAG &DAG = DCI.DAG;
15049 SDLoc DL(LD);
15050 SDValue BasePtr = LD->getBasePtr();
15051 SDValue NewLD1 =
15052 DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
15053 LD->getAlign(), LD->getMemOperand()->getFlags());
15054
15055 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
15056 DAG.getConstant(4, DL, MVT::i32));
15057
15058 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
15059 LD->getPointerInfo().getWithOffset(4),
15060 commonAlignment(LD->getAlign(), 4),
15061 LD->getMemOperand()->getFlags());
15062
15063 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
15064 if (DCI.DAG.getDataLayout().isBigEndian())
15065 std::swap (NewLD1, NewLD2);
15066 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
15067 return Result;
15068 }
15069
15070 // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
15071 // VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b
15072 if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15073 isa<ConstantSDNode>(InDouble.getOperand(1))) {
15074 SDValue BV = InDouble.getOperand(0);
15075 // Look up through any nop bitcasts and vector_reg_casts. bitcasts may
15076 // change lane order under big endian.
15077 bool BVSwap = BV.getOpcode() == ISD::BITCAST;
15078 while (
15079 (BV.getOpcode() == ISD::BITCAST ||
15081 (BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64)) {
15082 BVSwap = BV.getOpcode() == ISD::BITCAST;
15083 BV = BV.getOperand(0);
15084 }
15085 if (BV.getValueType() != MVT::v4i32)
15086 return SDValue();
15087
15088 // Handle buildvectors, pulling out the correct lane depending on
15089 // endianness.
15090 unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0;
15091 if (BV.getOpcode() == ISD::BUILD_VECTOR) {
15092 SDValue Op0 = BV.getOperand(Offset);
15093 SDValue Op1 = BV.getOperand(Offset + 1);
15094 if (!Subtarget->isLittle() && BVSwap)
15095 std::swap(Op0, Op1);
15096
15097 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15098 }
15099
15100 // A chain of insert_vectors, grabbing the correct value of the chain of
15101 // inserts.
15102 SDValue Op0, Op1;
15103 while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
15104 if (isa<ConstantSDNode>(BV.getOperand(2))) {
15105 if (BV.getConstantOperandVal(2) == Offset)
15106 Op0 = BV.getOperand(1);
15107 if (BV.getConstantOperandVal(2) == Offset + 1)
15108 Op1 = BV.getOperand(1);
15109 }
15110 BV = BV.getOperand(0);
15111 }
15112 if (!Subtarget->isLittle() && BVSwap)
15113 std::swap(Op0, Op1);
15114 if (Op0 && Op1)
15115 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15116 }
15117
15118 return SDValue();
15119}
15120
15121/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
15122/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
15124 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
15125 SDValue Op0 = N->getOperand(0);
15126 SDValue Op1 = N->getOperand(1);
15127 if (Op0.getOpcode() == ISD::BITCAST)
15128 Op0 = Op0.getOperand(0);
15129 if (Op1.getOpcode() == ISD::BITCAST)
15130 Op1 = Op1.getOperand(0);
15131 if (Op0.getOpcode() == ARMISD::VMOVRRD &&
15132 Op0.getNode() == Op1.getNode() &&
15133 Op0.getResNo() == 0 && Op1.getResNo() == 1)
15134 return DAG.getNode(ISD::BITCAST, SDLoc(N),
15135 N->getValueType(0), Op0.getOperand(0));
15136 return SDValue();
15137}
15138
15141 SDValue Op0 = N->getOperand(0);
15142
15143 // VMOVhr (VMOVrh (X)) -> X
15144 if (Op0->getOpcode() == ARMISD::VMOVrh)
15145 return Op0->getOperand(0);
15146
15147 // FullFP16: half values are passed in S-registers, and we don't
15148 // need any of the bitcast and moves:
15149 //
15150 // t2: f32,ch1,gl1? = CopyFromReg ch, Register:f32 %0, gl?
15151 // t5: i32 = bitcast t2
15152 // t18: f16 = ARMISD::VMOVhr t5
15153 // =>
15154 // tN: f16,ch2,gl2? = CopyFromReg ch, Register::f32 %0, gl?
15155 if (Op0->getOpcode() == ISD::BITCAST) {
15156 SDValue Copy = Op0->getOperand(0);
15157 if (Copy.getValueType() == MVT::f32 &&
15158 Copy->getOpcode() == ISD::CopyFromReg) {
15159 bool HasGlue = Copy->getNumOperands() == 3;
15160 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
15161 HasGlue ? Copy->getOperand(2) : SDValue()};
15162 EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};
15163 SDValue NewCopy =
15165 DCI.DAG.getVTList(ArrayRef(OutTys, HasGlue ? 3 : 2)),
15166 ArrayRef(Ops, HasGlue ? 3 : 2));
15167
15168 // Update Users, Chains, and Potential Glue.
15169 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewCopy.getValue(0));
15170 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(1), NewCopy.getValue(1));
15171 if (HasGlue)
15172 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(2),
15173 NewCopy.getValue(2));
15174
15175 return NewCopy;
15176 }
15177 }
15178
15179 // fold (VMOVhr (load x)) -> (load (f16*)x)
15180 if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
15181 if (LN0->hasOneUse() && LN0->isUnindexed() &&
15182 LN0->getMemoryVT() == MVT::i16) {
15183 SDValue Load =
15184 DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
15185 LN0->getBasePtr(), LN0->getMemOperand());
15186 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15187 DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
15188 return Load;
15189 }
15190 }
15191
15192 // Only the bottom 16 bits of the source register are used.
15193 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15194 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15195 if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI))
15196 return SDValue(N, 0);
15197
15198 return SDValue();
15199}
15200
15202 SDValue N0 = N->getOperand(0);
15203 EVT VT = N->getValueType(0);
15204
15205 // fold (VMOVrh (fpconst x)) -> const x
15206 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {
15207 APFloat V = C->getValueAPF();
15208 return DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
15209 }
15210
15211 // fold (VMOVrh (load x)) -> (zextload (i16*)x)
15212 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
15213 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15214
15215 SDValue Load =
15216 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
15217 LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
15218 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15219 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15220 return Load;
15221 }
15222
15223 // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
15224 if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15225 isa<ConstantSDNode>(N0->getOperand(1)))
15226 return DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
15227 N0->getOperand(1));
15228
15229 return SDValue();
15230}
15231
15232/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
15233/// are normal, non-volatile loads. If so, it is profitable to bitcast an
15234/// i64 vector to have f64 elements, since the value can then be loaded
15235/// directly into a VFP register.
15237 unsigned NumElts = N->getValueType(0).getVectorNumElements();
15238 for (unsigned i = 0; i < NumElts; ++i) {
15239 SDNode *Elt = N->getOperand(i).getNode();
15240 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
15241 return true;
15242 }
15243 return false;
15244}
15245
15246/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
15247/// ISD::BUILD_VECTOR.
15250 const ARMSubtarget *Subtarget) {
15251 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
15252 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
15253 // into a pair of GPRs, which is fine when the value is used as a scalar,
15254 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
15255 SelectionDAG &DAG = DCI.DAG;
15256 if (N->getNumOperands() == 2)
15257 if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
15258 return RV;
15259
15260 // Load i64 elements as f64 values so that type legalization does not split
15261 // them up into i32 values.
15262 EVT VT = N->getValueType(0);
15263 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
15264 return SDValue();
15265 SDLoc dl(N);
15267 unsigned NumElts = VT.getVectorNumElements();
15268 for (unsigned i = 0; i < NumElts; ++i) {
15269 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
15270 Ops.push_back(V);
15271 // Make the DAGCombiner fold the bitcast.
15272 DCI.AddToWorklist(V.getNode());
15273 }
15274 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
15275 SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
15276 return DAG.getNode(ISD::BITCAST, dl, VT, BV);
15277}
15278
15279/// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
15280static SDValue
15282 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
15283 // At that time, we may have inserted bitcasts from integer to float.
15284 // If these bitcasts have survived DAGCombine, change the lowering of this
15285 // BUILD_VECTOR in something more vector friendly, i.e., that does not
15286 // force to use floating point types.
15287
15288 // Make sure we can change the type of the vector.
15289 // This is possible iff:
15290 // 1. The vector is only used in a bitcast to a integer type. I.e.,
15291 // 1.1. Vector is used only once.
15292 // 1.2. Use is a bit convert to an integer type.
15293 // 2. The size of its operands are 32-bits (64-bits are not legal).
15294 EVT VT = N->getValueType(0);
15295 EVT EltVT = VT.getVectorElementType();
15296
15297 // Check 1.1. and 2.
15298 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
15299 return SDValue();
15300
15301 // By construction, the input type must be float.
15302 assert(EltVT == MVT::f32 && "Unexpected type!");
15303
15304 // Check 1.2.
15305 SDNode *Use = *N->use_begin();
15306 if (Use->getOpcode() != ISD::BITCAST ||
15307 Use->getValueType(0).isFloatingPoint())
15308 return SDValue();
15309
15310 // Check profitability.
15311 // Model is, if more than half of the relevant operands are bitcast from
15312 // i32, turn the build_vector into a sequence of insert_vector_elt.
15313 // Relevant operands are everything that is not statically
15314 // (i.e., at compile time) bitcasted.
15315 unsigned NumOfBitCastedElts = 0;
15316 unsigned NumElts = VT.getVectorNumElements();
15317 unsigned NumOfRelevantElts = NumElts;
15318 for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
15319 SDValue Elt = N->getOperand(Idx);
15320 if (Elt->getOpcode() == ISD::BITCAST) {
15321 // Assume only bit cast to i32 will go away.
15322 if (Elt->getOperand(0).getValueType() == MVT::i32)
15323 ++NumOfBitCastedElts;
15324 } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
15325 // Constants are statically casted, thus do not count them as
15326 // relevant operands.
15327 --NumOfRelevantElts;
15328 }
15329
15330 // Check if more than half of the elements require a non-free bitcast.
15331 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
15332 return SDValue();
15333
15334 SelectionDAG &DAG = DCI.DAG;
15335 // Create the new vector type.
15336 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
15337 // Check if the type is legal.
15338 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15339 if (!TLI.isTypeLegal(VecVT))
15340 return SDValue();
15341
15342 // Combine:
15343 // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
15344 // => BITCAST INSERT_VECTOR_ELT
15345 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
15346 // (BITCAST EN), N.
15347 SDValue Vec = DAG.getUNDEF(VecVT);
15348 SDLoc dl(N);
15349 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
15350 SDValue V = N->getOperand(Idx);
15351 if (V.isUndef())
15352 continue;
15353 if (V.getOpcode() == ISD::BITCAST &&
15354 V->getOperand(0).getValueType() == MVT::i32)
15355 // Fold obvious case.
15356 V = V.getOperand(0);
15357 else {
15358 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
15359 // Make the DAGCombiner fold the bitcasts.
15360 DCI.AddToWorklist(V.getNode());
15361 }
15362 SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
15363 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
15364 }
15365 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
15366 // Make the DAGCombiner fold the bitcasts.
15367 DCI.AddToWorklist(Vec.getNode());
15368 return Vec;
15369}
15370
15371static SDValue
15373 EVT VT = N->getValueType(0);
15374 SDValue Op = N->getOperand(0);
15375 SDLoc dl(N);
15376
15377 // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
15378 if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15379 // If the valuetypes are the same, we can remove the cast entirely.
15380 if (Op->getOperand(0).getValueType() == VT)
15381 return Op->getOperand(0);
15382 return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15383 }
15384
15385 // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
15386 // more VPNOT which might get folded as else predicates.
15387 if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
15388 SDValue X =
15389 DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15391 DCI.DAG.getConstant(65535, dl, MVT::i32));
15392 return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
15393 }
15394
15395 // Only the bottom 16 bits of the source register are used.
15396 if (Op.getValueType() == MVT::i32) {
15397 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15398 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15399 if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15400 return SDValue(N, 0);
15401 }
15402 return SDValue();
15403}
15404
15406 const ARMSubtarget *ST) {
15407 EVT VT = N->getValueType(0);
15408 SDValue Op = N->getOperand(0);
15409 SDLoc dl(N);
15410
15411 // Under Little endian, a VECTOR_REG_CAST is equivalent to a BITCAST
15412 if (ST->isLittle())
15413 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
15414
15415 // VECTOR_REG_CAST undef -> undef
15416 if (Op.isUndef())
15417 return DAG.getUNDEF(VT);
15418
15419 // VECTOR_REG_CAST(VECTOR_REG_CAST(x)) == VECTOR_REG_CAST(x)
15420 if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15421 // If the valuetypes are the same, we can remove the cast entirely.
15422 if (Op->getOperand(0).getValueType() == VT)
15423 return Op->getOperand(0);
15424 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
15425 }
15426
15427 return SDValue();
15428}
15429
15431 const ARMSubtarget *Subtarget) {
15432 if (!Subtarget->hasMVEIntegerOps())
15433 return SDValue();
15434
15435 EVT VT = N->getValueType(0);
15436 SDValue Op0 = N->getOperand(0);
15437 SDValue Op1 = N->getOperand(1);
15438 ARMCC::CondCodes Cond = (ARMCC::CondCodes)N->getConstantOperandVal(2);
15439 SDLoc dl(N);
15440
15441 // vcmp X, 0, cc -> vcmpz X, cc
15442 if (isZeroVector(Op1))
15443 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, N->getOperand(2));
15444
15445 unsigned SwappedCond = getSwappedCondition(Cond);
15446 if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
15447 // vcmp 0, X, cc -> vcmpz X, reversed(cc)
15448 if (isZeroVector(Op0))
15449 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
15450 DAG.getConstant(SwappedCond, dl, MVT::i32));
15451 // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
15452 if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
15453 return DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
15454 DAG.getConstant(SwappedCond, dl, MVT::i32));
15455 }
15456
15457 return SDValue();
15458}
15459
15460/// PerformInsertEltCombine - Target-specific dag combine xforms for
15461/// ISD::INSERT_VECTOR_ELT.
15464 // Bitcast an i64 load inserted into a vector to f64.
15465 // Otherwise, the i64 value will be legalized to a pair of i32 values.
15466 EVT VT = N->getValueType(0);
15467 SDNode *Elt = N->getOperand(1).getNode();
15468 if (VT.getVectorElementType() != MVT::i64 ||
15469 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
15470 return SDValue();
15471
15472 SelectionDAG &DAG = DCI.DAG;
15473 SDLoc dl(N);
15474 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
15476 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
15477 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
15478 // Make the DAGCombiner fold the bitcasts.
15479 DCI.AddToWorklist(Vec.getNode());
15480 DCI.AddToWorklist(V.getNode());
15481 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
15482 Vec, V, N->getOperand(2));
15483 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
15484}
15485
15486// Convert a pair of extracts from the same base vector to a VMOVRRD. Either
15487// directly or bitcast to an integer if the original is a float vector.
15488// extract(x, n); extract(x, n+1) -> VMOVRRD(extract v2f64 x, n/2)
15489// bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD(extract x, n/2)
15490static SDValue
15492 EVT VT = N->getValueType(0);
15493 SDLoc dl(N);
15494
15495 if (!DCI.isAfterLegalizeDAG() || VT != MVT::i32 ||
15496 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(MVT::f64))
15497 return SDValue();
15498
15499 SDValue Ext = SDValue(N, 0);
15500 if (Ext.getOpcode() == ISD::BITCAST &&
15501 Ext.getOperand(0).getValueType() == MVT::f32)
15502 Ext = Ext.getOperand(0);
15503 if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15504 !isa<ConstantSDNode>(Ext.getOperand(1)) ||
15505 Ext.getConstantOperandVal(1) % 2 != 0)
15506 return SDValue();
15507 if (Ext->use_size() == 1 &&
15508 (Ext->use_begin()->getOpcode() == ISD::SINT_TO_FP ||
15509 Ext->use_begin()->getOpcode() == ISD::UINT_TO_FP))
15510 return SDValue();
15511
15512 SDValue Op0 = Ext.getOperand(0);
15513 EVT VecVT = Op0.getValueType();
15514 unsigned ResNo = Op0.getResNo();
15515 unsigned Lane = Ext.getConstantOperandVal(1);
15516 if (VecVT.getVectorNumElements() != 4)
15517 return SDValue();
15518
15519 // Find another extract, of Lane + 1
15520 auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) {
15521 return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15522 isa<ConstantSDNode>(V->getOperand(1)) &&
15523 V->getConstantOperandVal(1) == Lane + 1 &&
15524 V->getOperand(0).getResNo() == ResNo;
15525 });
15526 if (OtherIt == Op0->uses().end())
15527 return SDValue();
15528
15529 // For float extracts, we need to be converting to a i32 for both vector
15530 // lanes.
15531 SDValue OtherExt(*OtherIt, 0);
15532 if (OtherExt.getValueType() != MVT::i32) {
15533 if (OtherExt->use_size() != 1 ||
15534 OtherExt->use_begin()->getOpcode() != ISD::BITCAST ||
15535 OtherExt->use_begin()->getValueType(0) != MVT::i32)
15536 return SDValue();
15537 OtherExt = SDValue(*OtherExt->use_begin(), 0);
15538 }
15539
15540 // Convert the type to a f64 and extract with a VMOVRRD.
15541 SDValue F64 = DCI.DAG.getNode(
15542 ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
15543 DCI.DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v2f64, Op0),
15544 DCI.DAG.getConstant(Ext.getConstantOperandVal(1) / 2, dl, MVT::i32));
15545 SDValue VMOVRRD =
15546 DCI.DAG.getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32}, F64);
15547
15548 DCI.CombineTo(OtherExt.getNode(), SDValue(VMOVRRD.getNode(), 1));
15549 return VMOVRRD;
15550}
15551
15554 const ARMSubtarget *ST) {
15555 SDValue Op0 = N->getOperand(0);
15556 EVT VT = N->getValueType(0);
15557 SDLoc dl(N);
15558
15559 // extract (vdup x) -> x
15560 if (Op0->getOpcode() == ARMISD::VDUP) {
15561 SDValue X = Op0->getOperand(0);
15562 if (VT == MVT::f16 && X.getValueType() == MVT::i32)
15563 return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
15564 if (VT == MVT::i32 && X.getValueType() == MVT::f16)
15565 return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
15566 if (VT == MVT::f32 && X.getValueType() == MVT::i32)
15567 return DCI.DAG.getNode(ISD::BITCAST, dl, VT, X);
15568
15569 while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
15570 X = X->getOperand(0);
15571 if (X.getValueType() == VT)
15572 return X;
15573 }
15574
15575 // extract ARM_BUILD_VECTOR -> x
15576 if (Op0->getOpcode() == ARMISD::BUILD_VECTOR &&
15577 isa<ConstantSDNode>(N->getOperand(1)) &&
15578 N->getConstantOperandVal(1) < Op0.getNumOperands()) {
15579 return Op0.getOperand(N->getConstantOperandVal(1));
15580 }
15581
15582 // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
15583 if (Op0.getValueType() == MVT::v4i32 &&
15584 isa<ConstantSDNode>(N->getOperand(1)) &&
15585 Op0.getOpcode() == ISD::BITCAST &&
15587 Op0.getOperand(0).getValueType() == MVT::v2f64) {
15588 SDValue BV = Op0.getOperand(0);
15589 unsigned Offset = N->getConstantOperandVal(1);
15590 SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);
15591 if (MOV.getOpcode() == ARMISD::VMOVDRR)
15592 return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
15593 }
15594
15595 // extract x, n; extract x, n+1 -> VMOVRRD x
15596 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
15597 return R;
15598
15599 // extract (MVETrunc(x)) -> extract x
15600 if (Op0->getOpcode() == ARMISD::MVETRUNC) {
15601 unsigned Idx = N->getConstantOperandVal(1);
15602 unsigned Vec =
15604 unsigned SubIdx =
15606 return DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Op0.getOperand(Vec),
15607 DCI.DAG.getConstant(SubIdx, dl, MVT::i32));
15608 }
15609
15610 return SDValue();
15611}
15612
15614 SDValue Op = N->getOperand(0);
15615 EVT VT = N->getValueType(0);
15616
15617 // sext_inreg(VGETLANEu) -> VGETLANEs
15618 if (Op.getOpcode() == ARMISD::VGETLANEu &&
15619 cast<VTSDNode>(N->getOperand(1))->getVT() ==
15620 Op.getOperand(0).getValueType().getScalarType())
15621 return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
15622 Op.getOperand(1));
15623
15624 return SDValue();
15625}
15626
15627static SDValue
15629 SDValue Vec = N->getOperand(0);
15630 SDValue SubVec = N->getOperand(1);
15631 uint64_t IdxVal = N->getConstantOperandVal(2);
15632 EVT VecVT = Vec.getValueType();
15633 EVT SubVT = SubVec.getValueType();
15634
15635 // Only do this for legal fixed vector types.
15636 if (!VecVT.isFixedLengthVector() ||
15637 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
15639 return SDValue();
15640
15641 // Ignore widening patterns.
15642 if (IdxVal == 0 && Vec.isUndef())
15643 return SDValue();
15644
15645 // Subvector must be half the width and an "aligned" insertion.
15646 unsigned NumSubElts = SubVT.getVectorNumElements();
15647 if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
15648 (IdxVal != 0 && IdxVal != NumSubElts))
15649 return SDValue();
15650
15651 // Fold insert_subvector -> concat_vectors
15652 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
15653 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
15654 SDLoc DL(N);
15655 SDValue Lo, Hi;
15656 if (IdxVal == 0) {
15657 Lo = SubVec;
15658 Hi = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15659 DCI.DAG.getVectorIdxConstant(NumSubElts, DL));
15660 } else {
15661 Lo = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15662 DCI.DAG.getVectorIdxConstant(0, DL));
15663 Hi = SubVec;
15664 }
15665 return DCI.DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
15666}
15667
15668// shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
15670 SelectionDAG &DAG) {
15671 SDValue Trunc = N->getOperand(0);
15672 EVT VT = Trunc.getValueType();
15673 if (Trunc.getOpcode() != ARMISD::MVETRUNC || !N->getOperand(1).isUndef())
15674 return SDValue();
15675
15676 SDLoc DL(Trunc);
15677 if (isVMOVNTruncMask(N->getMask(), VT, false))
15678 return DAG.getNode(
15679 ARMISD::VMOVN, DL, VT,
15680 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15681 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15682 DAG.getConstant(1, DL, MVT::i32));
15683 else if (isVMOVNTruncMask(N->getMask(), VT, true))
15684 return DAG.getNode(
15685 ARMISD::VMOVN, DL, VT,
15686 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15687 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15688 DAG.getConstant(1, DL, MVT::i32));
15689 return SDValue();
15690}
15691
15692/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
15693/// ISD::VECTOR_SHUFFLE.
15695 if (SDValue R = PerformShuffleVMOVNCombine(cast<ShuffleVectorSDNode>(N), DAG))
15696 return R;
15697
15698 // The LLVM shufflevector instruction does not require the shuffle mask
15699 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
15700 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
15701 // operands do not match the mask length, they are extended by concatenating
15702 // them with undef vectors. That is probably the right thing for other
15703 // targets, but for NEON it is better to concatenate two double-register
15704 // size vector operands into a single quad-register size vector. Do that
15705 // transformation here:
15706 // shuffle(concat(v1, undef), concat(v2, undef)) ->
15707 // shuffle(concat(v1, v2), undef)
15708 SDValue Op0 = N->getOperand(0);
15709 SDValue Op1 = N->getOperand(1);
15710 if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
15711 Op1.getOpcode() != ISD::CONCAT_VECTORS ||
15712 Op0.getNumOperands() != 2 ||
15713 Op1.getNumOperands() != 2)
15714 return SDValue();
15715 SDValue Concat0Op1 = Op0.getOperand(1);
15716 SDValue Concat1Op1 = Op1.getOperand(1);
15717 if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
15718 return SDValue();
15719 // Skip the transformation if any of the types are illegal.
15720 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15721 EVT VT = N->getValueType(0);
15722 if (!TLI.isTypeLegal(VT) ||
15723 !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
15724 !TLI.isTypeLegal(Concat1Op1.getValueType()))
15725 return SDValue();
15726
15727 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
15728 Op0.getOperand(0), Op1.getOperand(0));
15729 // Translate the shuffle mask.
15730 SmallVector<int, 16> NewMask;
15731 unsigned NumElts = VT.getVectorNumElements();
15732 unsigned HalfElts = NumElts/2;
15733 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15734 for (unsigned n = 0; n < NumElts; ++n) {
15735 int MaskElt = SVN->getMaskElt(n);
15736 int NewElt = -1;
15737 if (MaskElt < (int)HalfElts)
15738 NewElt = MaskElt;
15739 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
15740 NewElt = HalfElts + MaskElt - NumElts;
15741 NewMask.push_back(NewElt);
15742 }
15743 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
15744 DAG.getUNDEF(VT), NewMask);
15745}
15746
15747/// Load/store instruction that can be merged with a base address
15748/// update
15753 unsigned AddrOpIdx;
15754};
15755
15757 /// Instruction that updates a pointer
15759 /// Pointer increment operand
15761 /// Pointer increment value if it is a constant, or 0 otherwise
15762 unsigned ConstInc;
15763};
15764
15766 struct BaseUpdateUser &User,
15767 bool SimpleConstIncOnly,
15769 SelectionDAG &DAG = DCI.DAG;
15770 SDNode *N = Target.N;
15771 MemSDNode *MemN = cast<MemSDNode>(N);
15772 SDLoc dl(N);
15773
15774 // Find the new opcode for the updating load/store.
15775 bool isLoadOp = true;
15776 bool isLaneOp = false;
15777 // Workaround for vst1x and vld1x intrinsics which do not have alignment
15778 // as an operand.
15779 bool hasAlignment = true;
15780 unsigned NewOpc = 0;
15781 unsigned NumVecs = 0;
15782 if (Target.isIntrinsic) {
15783 unsigned IntNo = N->getConstantOperandVal(1);
15784 switch (IntNo) {
15785 default:
15786 llvm_unreachable("unexpected intrinsic for Neon base update");
15787 case Intrinsic::arm_neon_vld1:
15788 NewOpc = ARMISD::VLD1_UPD;
15789 NumVecs = 1;
15790 break;
15791 case Intrinsic::arm_neon_vld2:
15792 NewOpc = ARMISD::VLD2_UPD;
15793 NumVecs = 2;
15794 break;
15795 case Intrinsic::arm_neon_vld3:
15796 NewOpc = ARMISD::VLD3_UPD;
15797 NumVecs = 3;
15798 break;
15799 case Intrinsic::arm_neon_vld4:
15800 NewOpc = ARMISD::VLD4_UPD;
15801 NumVecs = 4;
15802 break;
15803 case Intrinsic::arm_neon_vld1x2:
15804 NewOpc = ARMISD::VLD1x2_UPD;
15805 NumVecs = 2;
15806 hasAlignment = false;
15807 break;
15808 case Intrinsic::arm_neon_vld1x3:
15809 NewOpc = ARMISD::VLD1x3_UPD;
15810 NumVecs = 3;
15811 hasAlignment = false;
15812 break;
15813 case Intrinsic::arm_neon_vld1x4:
15814 NewOpc = ARMISD::VLD1x4_UPD;
15815 NumVecs = 4;
15816 hasAlignment = false;
15817 break;
15818 case Intrinsic::arm_neon_vld2dup:
15819 NewOpc = ARMISD::VLD2DUP_UPD;
15820 NumVecs = 2;
15821 break;
15822 case Intrinsic::arm_neon_vld3dup:
15823 NewOpc = ARMISD::VLD3DUP_UPD;
15824 NumVecs = 3;
15825 break;
15826 case Intrinsic::arm_neon_vld4dup:
15827 NewOpc = ARMISD::VLD4DUP_UPD;
15828 NumVecs = 4;
15829 break;
15830 case Intrinsic::arm_neon_vld2lane:
15831 NewOpc = ARMISD::VLD2LN_UPD;
15832 NumVecs = 2;
15833 isLaneOp = true;
15834 break;
15835 case Intrinsic::arm_neon_vld3lane:
15836 NewOpc = ARMISD::VLD3LN_UPD;
15837 NumVecs = 3;
15838 isLaneOp = true;
15839 break;
15840 case Intrinsic::arm_neon_vld4lane:
15841 NewOpc = ARMISD::VLD4LN_UPD;
15842 NumVecs = 4;
15843 isLaneOp = true;
15844 break;
15845 case Intrinsic::arm_neon_vst1:
15846 NewOpc = ARMISD::VST1_UPD;
15847 NumVecs = 1;
15848 isLoadOp = false;
15849 break;
15850 case Intrinsic::arm_neon_vst2:
15851 NewOpc = ARMISD::VST2_UPD;
15852 NumVecs = 2;
15853 isLoadOp = false;
15854 break;
15855 case Intrinsic::arm_neon_vst3:
15856 NewOpc = ARMISD::VST3_UPD;
15857 NumVecs = 3;
15858 isLoadOp = false;
15859 break;
15860 case Intrinsic::arm_neon_vst4:
15861 NewOpc = ARMISD::VST4_UPD;
15862 NumVecs = 4;
15863 isLoadOp = false;
15864 break;
15865 case Intrinsic::arm_neon_vst2lane:
15866 NewOpc = ARMISD::VST2LN_UPD;
15867 NumVecs = 2;
15868 isLoadOp = false;
15869 isLaneOp = true;
15870 break;
15871 case Intrinsic::arm_neon_vst3lane:
15872 NewOpc = ARMISD::VST3LN_UPD;
15873 NumVecs = 3;
15874 isLoadOp = false;
15875 isLaneOp = true;
15876 break;
15877 case Intrinsic::arm_neon_vst4lane:
15878 NewOpc = ARMISD::VST4LN_UPD;
15879 NumVecs = 4;
15880 isLoadOp = false;
15881 isLaneOp = true;
15882 break;
15883 case Intrinsic::arm_neon_vst1x2:
15884 NewOpc = ARMISD::VST1x2_UPD;
15885 NumVecs = 2;
15886 isLoadOp = false;
15887 hasAlignment = false;
15888 break;
15889 case Intrinsic::arm_neon_vst1x3:
15890 NewOpc = ARMISD::VST1x3_UPD;
15891 NumVecs = 3;
15892 isLoadOp = false;
15893 hasAlignment = false;
15894 break;
15895 case Intrinsic::arm_neon_vst1x4:
15896 NewOpc = ARMISD::VST1x4_UPD;
15897 NumVecs = 4;
15898 isLoadOp = false;
15899 hasAlignment = false;
15900 break;
15901 }
15902 } else {
15903 isLaneOp = true;
15904 switch (N->getOpcode()) {
15905 default:
15906 llvm_unreachable("unexpected opcode for Neon base update");
15907 case ARMISD::VLD1DUP:
15908 NewOpc = ARMISD::VLD1DUP_UPD;
15909 NumVecs = 1;
15910 break;
15911 case ARMISD::VLD2DUP:
15912 NewOpc = ARMISD::VLD2DUP_UPD;
15913 NumVecs = 2;
15914 break;
15915 case ARMISD::VLD3DUP:
15916 NewOpc = ARMISD::VLD3DUP_UPD;
15917 NumVecs = 3;
15918 break;
15919 case ARMISD::VLD4DUP:
15920 NewOpc = ARMISD::VLD4DUP_UPD;
15921 NumVecs = 4;
15922 break;
15923 case ISD::LOAD:
15924 NewOpc = ARMISD::VLD1_UPD;
15925 NumVecs = 1;
15926 isLaneOp = false;
15927 break;
15928 case ISD::STORE:
15929 NewOpc = ARMISD::VST1_UPD;
15930 NumVecs = 1;
15931 isLaneOp = false;
15932 isLoadOp = false;
15933 break;
15934 }
15935 }
15936
15937 // Find the size of memory referenced by the load/store.
15938 EVT VecTy;
15939 if (isLoadOp) {
15940 VecTy = N->getValueType(0);
15941 } else if (Target.isIntrinsic) {
15942 VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
15943 } else {
15944 assert(Target.isStore &&
15945 "Node has to be a load, a store, or an intrinsic!");
15946 VecTy = N->getOperand(1).getValueType();
15947 }
15948
15949 bool isVLDDUPOp =
15950 NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
15951 NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
15952
15953 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
15954 if (isLaneOp || isVLDDUPOp)
15955 NumBytes /= VecTy.getVectorNumElements();
15956
15957 if (NumBytes >= 3 * 16 && User.ConstInc != NumBytes) {
15958 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
15959 // separate instructions that make it harder to use a non-constant update.
15960 return false;
15961 }
15962
15963 if (SimpleConstIncOnly && User.ConstInc != NumBytes)
15964 return false;
15965
15966 // OK, we found an ADD we can fold into the base update.
15967 // Now, create a _UPD node, taking care of not breaking alignment.
15968
15969 EVT AlignedVecTy = VecTy;
15970 Align Alignment = MemN->getAlign();
15971
15972 // If this is a less-than-standard-aligned load/store, change the type to
15973 // match the standard alignment.
15974 // The alignment is overlooked when selecting _UPD variants; and it's
15975 // easier to introduce bitcasts here than fix that.
15976 // There are 3 ways to get to this base-update combine:
15977 // - intrinsics: they are assumed to be properly aligned (to the standard
15978 // alignment of the memory type), so we don't need to do anything.
15979 // - ARMISD::VLDx nodes: they are only generated from the aforementioned
15980 // intrinsics, so, likewise, there's nothing to do.
15981 // - generic load/store instructions: the alignment is specified as an
15982 // explicit operand, rather than implicitly as the standard alignment
15983 // of the memory type (like the intrisics). We need to change the
15984 // memory type to match the explicit alignment. That way, we don't
15985 // generate non-standard-aligned ARMISD::VLDx nodes.
15986 if (isa<LSBaseSDNode>(N)) {
15987 if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) {
15988 MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8);
15989 assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
15990 assert(!isLaneOp && "Unexpected generic load/store lane.");
15991 unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
15992 AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
15993 }
15994 // Don't set an explicit alignment on regular load/stores that we want
15995 // to transform to VLD/VST 1_UPD nodes.
15996 // This matches the behavior of regular load/stores, which only get an
15997 // explicit alignment if the MMO alignment is larger than the standard
15998 // alignment of the memory type.
15999 // Intrinsics, however, always get an explicit alignment, set to the
16000 // alignment of the MMO.
16001 Alignment = Align(1);
16002 }
16003
16004 // Create the new updating load/store node.
16005 // First, create an SDVTList for the new updating node's results.
16006 EVT Tys[6];
16007 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16008 unsigned n;
16009 for (n = 0; n < NumResultVecs; ++n)
16010 Tys[n] = AlignedVecTy;
16011 Tys[n++] = MVT::i32;
16012 Tys[n] = MVT::Other;
16013 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16014
16015 // Then, gather the new node's operands.
16017 Ops.push_back(N->getOperand(0)); // incoming chain
16018 Ops.push_back(N->getOperand(Target.AddrOpIdx));
16019 Ops.push_back(User.Inc);
16020
16021 if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
16022 // Try to match the intrinsic's signature
16023 Ops.push_back(StN->getValue());
16024 } else {
16025 // Loads (and of course intrinsics) match the intrinsics' signature,
16026 // so just add all but the alignment operand.
16027 unsigned LastOperand =
16028 hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
16029 for (unsigned i = Target.AddrOpIdx + 1; i < LastOperand; ++i)
16030 Ops.push_back(N->getOperand(i));
16031 }
16032
16033 // For all node types, the alignment operand is always the last one.
16034 Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32));
16035
16036 // If this is a non-standard-aligned STORE, the penultimate operand is the
16037 // stored value. Bitcast it to the aligned type.
16038 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
16039 SDValue &StVal = Ops[Ops.size() - 2];
16040 StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
16041 }
16042
16043 EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
16044 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
16045 MemN->getMemOperand());
16046
16047 // Update the uses.
16048 SmallVector<SDValue, 5> NewResults;
16049 for (unsigned i = 0; i < NumResultVecs; ++i)
16050 NewResults.push_back(SDValue(UpdN.getNode(), i));
16051
16052 // If this is an non-standard-aligned LOAD, the first result is the loaded
16053 // value. Bitcast it to the expected result type.
16054 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
16055 SDValue &LdVal = NewResults[0];
16056 LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
16057 }
16058
16059 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16060 DCI.CombineTo(N, NewResults);
16061 DCI.CombineTo(User.N, SDValue(UpdN.getNode(), NumResultVecs));
16062
16063 return true;
16064}
16065
16066// If (opcode ptr inc) is and ADD-like instruction, return the
16067// increment value. Otherwise return 0.
16068static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,
16069 SDValue Inc, const SelectionDAG &DAG) {
16070 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16071 if (!CInc)
16072 return 0;
16073
16074 switch (Opcode) {
16075 case ARMISD::VLD1_UPD:
16076 case ISD::ADD:
16077 return CInc->getZExtValue();
16078 case ISD::OR: {
16079 if (DAG.haveNoCommonBitsSet(Ptr, Inc)) {
16080 // (OR ptr inc) is the same as (ADD ptr inc)
16081 return CInc->getZExtValue();
16082 }
16083 return 0;
16084 }
16085 default:
16086 return 0;
16087 }
16088}
16089
16091 switch (N->getOpcode()) {
16092 case ISD::ADD:
16093 case ISD::OR: {
16094 if (isa<ConstantSDNode>(N->getOperand(1))) {
16095 *Ptr = N->getOperand(0);
16096 *CInc = N->getOperand(1);
16097 return true;
16098 }
16099 return false;
16100 }
16101 case ARMISD::VLD1_UPD: {
16102 if (isa<ConstantSDNode>(N->getOperand(2))) {
16103 *Ptr = N->getOperand(1);
16104 *CInc = N->getOperand(2);
16105 return true;
16106 }
16107 return false;
16108 }
16109 default:
16110 return false;
16111 }
16112}
16113
16115 // Check that the add is independent of the load/store.
16116 // Otherwise, folding it would create a cycle. Search through Addr
16117 // as well, since the User may not be a direct user of Addr and
16118 // only share a base pointer.
16121 Worklist.push_back(N);
16122 Worklist.push_back(User);
16123 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16124 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16125 return false;
16126 return true;
16127}
16128
16129/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
16130/// NEON load/store intrinsics, and generic vector load/stores, to merge
16131/// base address updates.
16132/// For generic load/stores, the memory type is assumed to be a vector.
16133/// The caller is assumed to have checked legality.
16136 const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
16137 N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
16138 const bool isStore = N->getOpcode() == ISD::STORE;
16139 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
16140 BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
16141
16142 SDValue Addr = N->getOperand(AddrOpIdx);
16143
16145
16146 // Search for a use of the address operand that is an increment.
16147 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16148 UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
16149 SDNode *User = *UI;
16150 if (UI.getUse().getResNo() != Addr.getResNo() ||
16151 User->getNumOperands() != 2)
16152 continue;
16153
16154 SDValue Inc = User->getOperand(UI.getOperandNo() == 1 ? 0 : 1);
16155 unsigned ConstInc =
16156 getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
16157
16158 if (ConstInc || User->getOpcode() == ISD::ADD)
16159 BaseUpdates.push_back({User, Inc, ConstInc});
16160 }
16161
16162 // If the address is a constant pointer increment itself, find
16163 // another constant increment that has the same base operand
16164 SDValue Base;
16165 SDValue CInc;
16166 if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {
16167 unsigned Offset =
16168 getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
16169 for (SDNode::use_iterator UI = Base->use_begin(), UE = Base->use_end();
16170 UI != UE; ++UI) {
16171
16172 SDNode *User = *UI;
16173 if (UI.getUse().getResNo() != Base.getResNo() || User == Addr.getNode() ||
16174 User->getNumOperands() != 2)
16175 continue;
16176
16177 SDValue UserInc = User->getOperand(UI.getOperandNo() == 0 ? 1 : 0);
16178 unsigned UserOffset =
16179 getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
16180
16181 if (!UserOffset || UserOffset <= Offset)
16182 continue;
16183
16184 unsigned NewConstInc = UserOffset - Offset;
16185 SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
16186 BaseUpdates.push_back({User, NewInc, NewConstInc});
16187 }
16188 }
16189
16190 // Try to fold the load/store with an update that matches memory
16191 // access size. This should work well for sequential loads.
16192 //
16193 // Filter out invalid updates as well.
16194 unsigned NumValidUpd = BaseUpdates.size();
16195 for (unsigned I = 0; I < NumValidUpd;) {
16196 BaseUpdateUser &User = BaseUpdates[I];
16197 if (!isValidBaseUpdate(N, User.N)) {
16198 --NumValidUpd;
16199 std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16200 continue;
16201 }
16202
16203 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
16204 return SDValue();
16205 ++I;
16206 }
16207 BaseUpdates.resize(NumValidUpd);
16208
16209 // Try to fold with other users. Non-constant updates are considered
16210 // first, and constant updates are sorted to not break a sequence of
16211 // strided accesses (if there is any).
16212 std::stable_sort(BaseUpdates.begin(), BaseUpdates.end(),
16213 [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) {
16214 return LHS.ConstInc < RHS.ConstInc;
16215 });
16216 for (BaseUpdateUser &User : BaseUpdates) {
16217 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
16218 return SDValue();
16219 }
16220 return SDValue();
16221}
16222
16225 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16226 return SDValue();
16227
16228 return CombineBaseUpdate(N, DCI);
16229}
16230
16233 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16234 return SDValue();
16235
16236 SelectionDAG &DAG = DCI.DAG;
16237 SDValue Addr = N->getOperand(2);
16238 MemSDNode *MemN = cast<MemSDNode>(N);
16239 SDLoc dl(N);
16240
16241 // For the stores, where there are multiple intrinsics we only actually want
16242 // to post-inc the last of the them.
16243 unsigned IntNo = N->getConstantOperandVal(1);
16244 if (IntNo == Intrinsic::arm_mve_vst2q && N->getConstantOperandVal(5) != 1)
16245 return SDValue();
16246 if (IntNo == Intrinsic::arm_mve_vst4q && N->getConstantOperandVal(7) != 3)
16247 return SDValue();
16248
16249 // Search for a use of the address operand that is an increment.
16250 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16251 UE = Addr.getNode()->use_end();
16252 UI != UE; ++UI) {
16253 SDNode *User = *UI;
16254 if (User->getOpcode() != ISD::ADD ||
16255 UI.getUse().getResNo() != Addr.getResNo())
16256 continue;
16257
16258 // Check that the add is independent of the load/store. Otherwise, folding
16259 // it would create a cycle. We can avoid searching through Addr as it's a
16260 // predecessor to both.
16263 Visited.insert(Addr.getNode());
16264 Worklist.push_back(N);
16265 Worklist.push_back(User);
16266 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16267 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16268 continue;
16269
16270 // Find the new opcode for the updating load/store.
16271 bool isLoadOp = true;
16272 unsigned NewOpc = 0;
16273 unsigned NumVecs = 0;
16274 switch (IntNo) {
16275 default:
16276 llvm_unreachable("unexpected intrinsic for MVE VLDn combine");
16277 case Intrinsic::arm_mve_vld2q:
16278 NewOpc = ARMISD::VLD2_UPD;
16279 NumVecs = 2;
16280 break;
16281 case Intrinsic::arm_mve_vld4q:
16282 NewOpc = ARMISD::VLD4_UPD;
16283 NumVecs = 4;
16284 break;
16285 case Intrinsic::arm_mve_vst2q:
16286 NewOpc = ARMISD::VST2_UPD;
16287 NumVecs = 2;
16288 isLoadOp = false;
16289 break;
16290 case Intrinsic::arm_mve_vst4q:
16291 NewOpc = ARMISD::VST4_UPD;
16292 NumVecs = 4;
16293 isLoadOp = false;
16294 break;
16295 }
16296
16297 // Find the size of memory referenced by the load/store.
16298 EVT VecTy;
16299 if (isLoadOp) {
16300 VecTy = N->getValueType(0);
16301 } else {
16302 VecTy = N->getOperand(3).getValueType();
16303 }
16304
16305 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16306
16307 // If the increment is a constant, it must match the memory ref size.
16308 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
16309 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16310 if (!CInc || CInc->getZExtValue() != NumBytes)
16311 continue;
16312
16313 // Create the new updating load/store node.
16314 // First, create an SDVTList for the new updating node's results.
16315 EVT Tys[6];
16316 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16317 unsigned n;
16318 for (n = 0; n < NumResultVecs; ++n)
16319 Tys[n] = VecTy;
16320 Tys[n++] = MVT::i32;
16321 Tys[n] = MVT::Other;
16322 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16323
16324 // Then, gather the new node's operands.
16326 Ops.push_back(N->getOperand(0)); // incoming chain
16327 Ops.push_back(N->getOperand(2)); // ptr
16328 Ops.push_back(Inc);
16329
16330 for (unsigned i = 3; i < N->getNumOperands(); ++i)
16331 Ops.push_back(N->getOperand(i));
16332
16333 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, VecTy,
16334 MemN->getMemOperand());
16335
16336 // Update the uses.
16337 SmallVector<SDValue, 5> NewResults;
16338 for (unsigned i = 0; i < NumResultVecs; ++i)
16339 NewResults.push_back(SDValue(UpdN.getNode(), i));
16340
16341 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16342 DCI.CombineTo(N, NewResults);
16343 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
16344
16345 break;
16346 }
16347
16348 return SDValue();
16349}
16350
16351/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
16352/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
16353/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
16354/// return true.
16356 SelectionDAG &DAG = DCI.DAG;
16357 EVT VT = N->getValueType(0);
16358 // vldN-dup instructions only support 64-bit vectors for N > 1.
16359 if (!VT.is64BitVector())
16360 return false;
16361
16362 // Check if the VDUPLANE operand is a vldN-dup intrinsic.
16363 SDNode *VLD = N->getOperand(0).getNode();
16364 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
16365 return false;
16366 unsigned NumVecs = 0;
16367 unsigned NewOpc = 0;
16368 unsigned IntNo = VLD->getConstantOperandVal(1);
16369 if (IntNo == Intrinsic::arm_neon_vld2lane) {
16370 NumVecs = 2;
16371 NewOpc = ARMISD::VLD2DUP;
16372 } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
16373 NumVecs = 3;
16374 NewOpc = ARMISD::VLD3DUP;
16375 } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
16376 NumVecs = 4;
16377 NewOpc = ARMISD::VLD4DUP;
16378 } else {
16379 return false;
16380 }
16381
16382 // First check that all the vldN-lane uses are VDUPLANEs and that the lane
16383 // numbers match the load.
16384 unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3);
16385 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16386 UI != UE; ++UI) {
16387 // Ignore uses of the chain result.
16388 if (UI.getUse().getResNo() == NumVecs)
16389 continue;
16390 SDNode *User = *UI;
16391 if (User->getOpcode() != ARMISD::VDUPLANE ||
16392 VLDLaneNo != User->getConstantOperandVal(1))
16393 return false;
16394 }
16395
16396 // Create the vldN-dup node.
16397 EVT Tys[5];
16398 unsigned n;
16399 for (n = 0; n < NumVecs; ++n)
16400 Tys[n] = VT;
16401 Tys[n] = MVT::Other;
16402 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs + 1));
16403 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
16404 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
16405 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
16406 Ops, VLDMemInt->getMemoryVT(),
16407 VLDMemInt->getMemOperand());
16408
16409 // Update the uses.
16410 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16411 UI != UE; ++UI) {
16412 unsigned ResNo = UI.getUse().getResNo();
16413 // Ignore uses of the chain result.
16414 if (ResNo == NumVecs)
16415 continue;
16416 SDNode *User = *UI;
16417 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
16418 }
16419
16420 // Now the vldN-lane intrinsic is dead except for its chain result.
16421 // Update uses of the chain.
16422 std::vector<SDValue> VLDDupResults;
16423 for (unsigned n = 0; n < NumVecs; ++n)
16424 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
16425 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
16426 DCI.CombineTo(VLD, VLDDupResults);
16427
16428 return true;
16429}
16430
16431/// PerformVDUPLANECombine - Target-specific dag combine xforms for
16432/// ARMISD::VDUPLANE.
16435 const ARMSubtarget *Subtarget) {
16436 SDValue Op = N->getOperand(0);
16437 EVT VT = N->getValueType(0);
16438
16439 // On MVE, we just convert the VDUPLANE to a VDUP with an extract.
16440 if (Subtarget->hasMVEIntegerOps()) {
16441 EVT ExtractVT = VT.getVectorElementType();
16442 // We need to ensure we are creating a legal type.
16443 if (!DCI.DAG.getTargetLoweringInfo().isTypeLegal(ExtractVT))
16444 ExtractVT = MVT::i32;
16445 SDValue Extract = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ExtractVT,
16446 N->getOperand(0), N->getOperand(1));
16447 return DCI.DAG.getNode(ARMISD::VDUP, SDLoc(N), VT, Extract);
16448 }
16449
16450 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
16451 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
16452 if (CombineVLDDUP(N, DCI))
16453 return SDValue(N, 0);
16454
16455 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
16456 // redundant. Ignore bit_converts for now; element sizes are checked below.
16457 while (Op.getOpcode() == ISD::BITCAST)
16458 Op = Op.getOperand(0);
16459 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
16460 return SDValue();
16461
16462 // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
16463 unsigned EltSize = Op.getScalarValueSizeInBits();
16464 // The canonical VMOV for a zero vector uses a 32-bit element size.
16465 unsigned Imm = Op.getConstantOperandVal(0);
16466 unsigned EltBits;
16467 if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
16468 EltSize = 8;
16469 if (EltSize > VT.getScalarSizeInBits())
16470 return SDValue();
16471
16472 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
16473}
16474
16475/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
16477 const ARMSubtarget *Subtarget) {
16478 SDValue Op = N->getOperand(0);
16479 SDLoc dl(N);
16480
16481 if (Subtarget->hasMVEIntegerOps()) {
16482 // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
16483 // need to come from a GPR.
16484 if (Op.getValueType() == MVT::f32)
16485 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16486 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op));
16487 else if (Op.getValueType() == MVT::f16)
16488 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16489 DAG.getNode(ARMISD::VMOVrh, dl, MVT::i32, Op));
16490 }
16491
16492 if (!Subtarget->hasNEON())
16493 return SDValue();
16494
16495 // Match VDUP(LOAD) -> VLD1DUP.
16496 // We match this pattern here rather than waiting for isel because the
16497 // transform is only legal for unindexed loads.
16498 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
16499 if (LD && Op.hasOneUse() && LD->isUnindexed() &&
16500 LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
16501 SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16502 DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
16503 SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
16504 SDValue VLDDup =
16505 DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops,
16506 LD->getMemoryVT(), LD->getMemOperand());
16507 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
16508 return VLDDup;
16509 }
16510
16511 return SDValue();
16512}
16513
16516 const ARMSubtarget *Subtarget) {
16517 EVT VT = N->getValueType(0);
16518
16519 // If this is a legal vector load, try to combine it into a VLD1_UPD.
16520 if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
16522 return CombineBaseUpdate(N, DCI);
16523
16524 return SDValue();
16525}
16526
16527// Optimize trunc store (of multiple scalars) to shuffle and store. First,
16528// pack all of the elements in one place. Next, store to memory in fewer
16529// chunks.
16531 SelectionDAG &DAG) {
16532 SDValue StVal = St->getValue();
16533 EVT VT = StVal.getValueType();
16534 if (!St->isTruncatingStore() || !VT.isVector())
16535 return SDValue();
16536 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16537 EVT StVT = St->getMemoryVT();
16538 unsigned NumElems = VT.getVectorNumElements();
16539 assert(StVT != VT && "Cannot truncate to the same type");
16540 unsigned FromEltSz = VT.getScalarSizeInBits();
16541 unsigned ToEltSz = StVT.getScalarSizeInBits();
16542
16543 // From, To sizes and ElemCount must be pow of two
16544 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))
16545 return SDValue();
16546
16547 // We are going to use the original vector elt for storing.
16548 // Accumulated smaller vector elements must be a multiple of the store size.
16549 if (0 != (NumElems * FromEltSz) % ToEltSz)
16550 return SDValue();
16551
16552 unsigned SizeRatio = FromEltSz / ToEltSz;
16553 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
16554
16555 // Create a type on which we perform the shuffle.
16556 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
16557 NumElems * SizeRatio);
16558 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
16559
16560 SDLoc DL(St);
16561 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
16562 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
16563 for (unsigned i = 0; i < NumElems; ++i)
16564 ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
16565 : i * SizeRatio;
16566
16567 // Can't shuffle using an illegal type.
16568 if (!TLI.isTypeLegal(WideVecVT))
16569 return SDValue();
16570
16571 SDValue Shuff = DAG.getVectorShuffle(
16572 WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);
16573 // At this point all of the data is stored at the bottom of the
16574 // register. We now need to save it to mem.
16575
16576 // Find the largest store unit
16577 MVT StoreType = MVT::i8;
16578 for (MVT Tp : MVT::integer_valuetypes()) {
16579 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
16580 StoreType = Tp;
16581 }
16582 // Didn't find a legal store type.
16583 if (!TLI.isTypeLegal(StoreType))
16584 return SDValue();
16585
16586 // Bitcast the original vector into a vector of store-size units
16587 EVT StoreVecVT =
16588 EVT::getVectorVT(*DAG.getContext(), StoreType,
16589 VT.getSizeInBits() / EVT(StoreType).getSizeInBits());
16590 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
16591 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
16593 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
16594 TLI.getPointerTy(DAG.getDataLayout()));
16595 SDValue BasePtr = St->getBasePtr();
16596
16597 // Perform one or more big stores into memory.
16598 unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();
16599 for (unsigned I = 0; I < E; I++) {
16600 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,
16601 ShuffWide, DAG.getIntPtrConstant(I, DL));
16602 SDValue Ch =
16603 DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
16604 St->getAlign(), St->getMemOperand()->getFlags());
16605 BasePtr =
16606 DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
16607 Chains.push_back(Ch);
16608 }
16609 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
16610}
16611
16612// Try taking a single vector store from an fpround (which would otherwise turn
16613// into an expensive buildvector) and splitting it into a series of narrowing
16614// stores.
16616 SelectionDAG &DAG) {
16617 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16618 return SDValue();
16619 SDValue Trunc = St->getValue();
16620 if (Trunc->getOpcode() != ISD::FP_ROUND)
16621 return SDValue();
16622 EVT FromVT = Trunc->getOperand(0).getValueType();
16623 EVT ToVT = Trunc.getValueType();
16624 if (!ToVT.isVector())
16625 return SDValue();
16627 EVT ToEltVT = ToVT.getVectorElementType();
16628 EVT FromEltVT = FromVT.getVectorElementType();
16629
16630 if (FromEltVT != MVT::f32 || ToEltVT != MVT::f16)
16631 return SDValue();
16632
16633 unsigned NumElements = 4;
16634 if (FromVT.getVectorNumElements() % NumElements != 0)
16635 return SDValue();
16636
16637 // Test if the Trunc will be convertable to a VMOVN with a shuffle, and if so
16638 // use the VMOVN over splitting the store. We are looking for patterns of:
16639 // !rev: 0 N 1 N+1 2 N+2 ...
16640 // rev: N 0 N+1 1 N+2 2 ...
16641 // The shuffle may either be a single source (in which case N = NumElts/2) or
16642 // two inputs extended with concat to the same size (in which case N =
16643 // NumElts).
16644 auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
16645 ArrayRef<int> M = SVN->getMask();
16646 unsigned NumElts = ToVT.getVectorNumElements();
16647 if (SVN->getOperand(1).isUndef())
16648 NumElts /= 2;
16649
16650 unsigned Off0 = Rev ? NumElts : 0;
16651 unsigned Off1 = Rev ? 0 : NumElts;
16652
16653 for (unsigned I = 0; I < NumElts; I += 2) {
16654 if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
16655 return false;
16656 if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
16657 return false;
16658 }
16659
16660 return true;
16661 };
16662
16663 if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
16664 if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
16665 return SDValue();
16666
16667 LLVMContext &C = *DAG.getContext();
16668 SDLoc DL(St);
16669 // Details about the old store
16670 SDValue Ch = St->getChain();
16671 SDValue BasePtr = St->getBasePtr();
16672 Align Alignment = St->getOriginalAlign();
16674 AAMDNodes AAInfo = St->getAAInfo();
16675
16676 // We split the store into slices of NumElements. fp16 trunc stores are vcvt
16677 // and then stored as truncating integer stores.
16678 EVT NewFromVT = EVT::getVectorVT(C, FromEltVT, NumElements);
16679 EVT NewToVT = EVT::getVectorVT(
16680 C, EVT::getIntegerVT(C, ToEltVT.getSizeInBits()), NumElements);
16681
16683 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
16684 unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
16685 SDValue NewPtr =
16686 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16687
16688 SDValue Extract =
16689 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
16690 DAG.getConstant(i * NumElements, DL, MVT::i32));
16691
16692 SDValue FPTrunc =
16693 DAG.getNode(ARMISD::VCVTN, DL, MVT::v8f16, DAG.getUNDEF(MVT::v8f16),
16694 Extract, DAG.getConstant(0, DL, MVT::i32));
16695 Extract = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v4i32, FPTrunc);
16696
16697 SDValue Store = DAG.getTruncStore(
16698 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16699 NewToVT, Alignment, MMOFlags, AAInfo);
16700 Stores.push_back(Store);
16701 }
16702 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16703}
16704
16705// Try taking a single vector store from an MVETRUNC (which would otherwise turn
16706// into an expensive buildvector) and splitting it into a series of narrowing
16707// stores.
16709 SelectionDAG &DAG) {
16710 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16711 return SDValue();
16712 SDValue Trunc = St->getValue();
16713 if (Trunc->getOpcode() != ARMISD::MVETRUNC)
16714 return SDValue();
16715 EVT FromVT = Trunc->getOperand(0).getValueType();
16716 EVT ToVT = Trunc.getValueType();
16717
16718 LLVMContext &C = *DAG.getContext();
16719 SDLoc DL(St);
16720 // Details about the old store
16721 SDValue Ch = St->getChain();
16722 SDValue BasePtr = St->getBasePtr();
16723 Align Alignment = St->getOriginalAlign();
16725 AAMDNodes AAInfo = St->getAAInfo();
16726
16727 EVT NewToVT = EVT::getVectorVT(C, ToVT.getVectorElementType(),
16728 FromVT.getVectorNumElements());
16729
16731 for (unsigned i = 0; i < Trunc.getNumOperands(); i++) {
16732 unsigned NewOffset =
16733 i * FromVT.getVectorNumElements() * ToVT.getScalarSizeInBits() / 8;
16734 SDValue NewPtr =
16735 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16736
16737 SDValue Extract = Trunc.getOperand(i);
16738 SDValue Store = DAG.getTruncStore(
16739 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16740 NewToVT, Alignment, MMOFlags, AAInfo);
16741 Stores.push_back(Store);
16742 }
16743 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16744}
16745
16746// Given a floating point store from an extracted vector, with an integer
16747// VGETLANE that already exists, store the existing VGETLANEu directly. This can
16748// help reduce fp register pressure, doesn't require the fp extract and allows
16749// use of more integer post-inc stores not available with vstr.
16751 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16752 return SDValue();
16753 SDValue Extract = St->getValue();
16754 EVT VT = Extract.getValueType();
16755 // For now only uses f16. This may be useful for f32 too, but that will
16756 // be bitcast(extract), not the VGETLANEu we currently check here.
16757 if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16758 return SDValue();
16759
16760 SDNode *GetLane =
16761 DAG.getNodeIfExists(ARMISD::VGETLANEu, DAG.getVTList(MVT::i32),
16762 {Extract.getOperand(0), Extract.getOperand(1)});
16763 if (!GetLane)
16764 return SDValue();
16765
16766 LLVMContext &C = *DAG.getContext();
16767 SDLoc DL(St);
16768 // Create a new integer store to replace the existing floating point version.
16769 SDValue Ch = St->getChain();
16770 SDValue BasePtr = St->getBasePtr();
16771 Align Alignment = St->getOriginalAlign();
16773 AAMDNodes AAInfo = St->getAAInfo();
16774 EVT NewToVT = EVT::getIntegerVT(C, VT.getSizeInBits());
16775 SDValue Store = DAG.getTruncStore(Ch, DL, SDValue(GetLane, 0), BasePtr,
16776 St->getPointerInfo(), NewToVT, Alignment,
16777 MMOFlags, AAInfo);
16778
16779 return Store;
16780}
16781
16782/// PerformSTORECombine - Target-specific dag combine xforms for
16783/// ISD::STORE.
16786 const ARMSubtarget *Subtarget) {
16787 StoreSDNode *St = cast<StoreSDNode>(N);
16788 if (St->isVolatile())
16789 return SDValue();
16790 SDValue StVal = St->getValue();
16791 EVT VT = StVal.getValueType();
16792
16793 if (Subtarget->hasNEON())
16794 if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG))
16795 return Store;
16796
16797 if (Subtarget->hasMVEFloatOps())
16798 if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))
16799 return NewToken;
16800
16801 if (Subtarget->hasMVEIntegerOps()) {
16802 if (SDValue NewChain = PerformExtractFpToIntStores(St, DCI.DAG))
16803 return NewChain;
16804 if (SDValue NewToken =
16806 return NewToken;
16807 }
16808
16809 if (!ISD::isNormalStore(St))
16810 return SDValue();
16811
16812 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
16813 // ARM stores of arguments in the same cache line.
16814 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
16815 StVal.getNode()->hasOneUse()) {
16816 SelectionDAG &DAG = DCI.DAG;
16817 bool isBigEndian = DAG.getDataLayout().isBigEndian();
16818 SDLoc DL(St);
16819 SDValue BasePtr = St->getBasePtr();
16820 SDValue NewST1 = DAG.getStore(
16821 St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
16822 BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
16823 St->getMemOperand()->getFlags());
16824
16825 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
16826 DAG.getConstant(4, DL, MVT::i32));
16827 return DAG.getStore(NewST1.getValue(0), DL,
16828 StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
16829 OffsetPtr, St->getPointerInfo().getWithOffset(4),
16830 St->getOriginalAlign(),
16831 St->getMemOperand()->getFlags());
16832 }
16833
16834 if (StVal.getValueType() == MVT::i64 &&
16836
16837 // Bitcast an i64 store extracted from a vector to f64.
16838 // Otherwise, the i64 value will be legalized to a pair of i32 values.
16839 SelectionDAG &DAG = DCI.DAG;
16840 SDLoc dl(StVal);
16841 SDValue IntVec = StVal.getOperand(0);
16842 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
16844 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
16845 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
16846 Vec, StVal.getOperand(1));
16847 dl = SDLoc(N);
16848 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
16849 // Make the DAGCombiner fold the bitcasts.
16850 DCI.AddToWorklist(Vec.getNode());
16851 DCI.AddToWorklist(ExtElt.getNode());
16852 DCI.AddToWorklist(V.getNode());
16853 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
16854 St->getPointerInfo(), St->getAlign(),
16855 St->getMemOperand()->getFlags(), St->getAAInfo());
16856 }
16857
16858 // If this is a legal vector store, try to combine it into a VST1_UPD.
16859 if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
16861 return CombineBaseUpdate(N, DCI);
16862
16863 return SDValue();
16864}
16865
16866/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
16867/// can replace combinations of VMUL and VCVT (floating-point to integer)
16868/// when the VMUL has a constant operand that is a power of 2.
16869///
16870/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
16871/// vmul.f32 d16, d17, d16
16872/// vcvt.s32.f32 d16, d16
16873/// becomes:
16874/// vcvt.s32.f32 d16, d16, #3
16876 const ARMSubtarget *Subtarget) {
16877 if (!Subtarget->hasNEON())
16878 return SDValue();
16879
16880 SDValue Op = N->getOperand(0);
16881 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
16882 Op.getOpcode() != ISD::FMUL)
16883 return SDValue();
16884
16885 SDValue ConstVec = Op->getOperand(1);
16886 if (!isa<BuildVectorSDNode>(ConstVec))
16887 return SDValue();
16888
16889 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
16890 uint32_t FloatBits = FloatTy.getSizeInBits();
16891 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
16892 uint32_t IntBits = IntTy.getSizeInBits();
16893 unsigned NumLanes = Op.getValueType().getVectorNumElements();
16894 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16895 // These instructions only exist converting from f32 to i32. We can handle
16896 // smaller integers by generating an extra truncate, but larger ones would
16897 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16898 // these intructions only support v2i32/v4i32 types.
16899 return SDValue();
16900 }
16901
16902 BitVector UndefElements;
16903 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
16904 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16905 if (C == -1 || C == 0 || C > 32)
16906 return SDValue();
16907
16908 SDLoc dl(N);
16909 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
16910 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
16911 Intrinsic::arm_neon_vcvtfp2fxu;
16912 SDValue FixConv = DAG.getNode(
16913 ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
16914 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
16915 DAG.getConstant(C, dl, MVT::i32));
16916
16917 if (IntBits < FloatBits)
16918 FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
16919
16920 return FixConv;
16921}
16922
16924 const ARMSubtarget *Subtarget) {
16925 if (!Subtarget->hasMVEFloatOps())
16926 return SDValue();
16927
16928 // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
16929 // The second form can be more easily turned into a predicated vadd, and
16930 // possibly combined into a fma to become a predicated vfma.
16931 SDValue Op0 = N->getOperand(0);
16932 SDValue Op1 = N->getOperand(1);
16933 EVT VT = N->getValueType(0);
16934 SDLoc DL(N);
16935
16936 // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
16937 // which these VMOV's represent.
16938 auto isIdentitySplat = [&](SDValue Op, bool NSZ) {
16939 if (Op.getOpcode() != ISD::BITCAST ||
16940 Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
16941 return false;
16942 uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0);
16943 if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
16944 return true;
16945 if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
16946 return true;
16947 return false;
16948 };
16949
16950 if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
16951 std::swap(Op0, Op1);
16952
16953 if (Op1.getOpcode() != ISD::VSELECT)
16954 return SDValue();
16955
16956 SDNodeFlags FaddFlags = N->getFlags();
16957 bool NSZ = FaddFlags.hasNoSignedZeros();
16958 if (!isIdentitySplat(Op1.getOperand(2), NSZ))
16959 return SDValue();
16960
16961 SDValue FAdd =
16962 DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags);
16963 return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);
16964}
16965
16967 SDValue LHS = N->getOperand(0);
16968 SDValue RHS = N->getOperand(1);
16969 EVT VT = N->getValueType(0);
16970 SDLoc DL(N);
16971
16972 if (!N->getFlags().hasAllowReassociation())
16973 return SDValue();
16974
16975 // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
16976 auto ReassocComplex = [&](SDValue A, SDValue B) {
16977 if (A.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
16978 return SDValue();
16979 unsigned Opc = A.getConstantOperandVal(0);
16980 if (Opc != Intrinsic::arm_mve_vcmlaq)
16981 return SDValue();
16982 SDValue VCMLA = DAG.getNode(
16983 ISD::INTRINSIC_WO_CHAIN, DL, VT, A.getOperand(0), A.getOperand(1),
16984 DAG.getNode(ISD::FADD, DL, VT, A.getOperand(2), B, N->getFlags()),
16985 A.getOperand(3), A.getOperand(4));
16986 VCMLA->setFlags(A->getFlags());
16987 return VCMLA;
16988 };
16989 if (SDValue R = ReassocComplex(LHS, RHS))
16990 return R;
16991 if (SDValue R = ReassocComplex(RHS, LHS))
16992 return R;
16993
16994 return SDValue();
16995}
16996
16998 const ARMSubtarget *Subtarget) {
16999 if (SDValue S = PerformFAddVSelectCombine(N, DAG, Subtarget))
17000 return S;
17001 if (SDValue S = PerformFADDVCMLACombine(N, DAG))
17002 return S;
17003 return SDValue();
17004}
17005
17006/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
17007/// can replace combinations of VCVT (integer to floating-point) and VDIV
17008/// when the VDIV has a constant operand that is a power of 2.
17009///
17010/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
17011/// vcvt.f32.s32 d16, d16
17012/// vdiv.f32 d16, d17, d16
17013/// becomes:
17014/// vcvt.f32.s32 d16, d16, #3
17016 const ARMSubtarget *Subtarget) {
17017 if (!Subtarget->hasNEON())
17018 return SDValue();
17019
17020 SDValue Op = N->getOperand(0);
17021 unsigned OpOpcode = Op.getNode()->getOpcode();
17022 if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
17023 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
17024 return SDValue();
17025
17026 SDValue ConstVec = N->getOperand(1);
17027 if (!isa<BuildVectorSDNode>(ConstVec))
17028 return SDValue();
17029
17030 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
17031 uint32_t FloatBits = FloatTy.getSizeInBits();
17032 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
17033 uint32_t IntBits = IntTy.getSizeInBits();
17034 unsigned NumLanes = Op.getValueType().getVectorNumElements();
17035 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
17036 // These instructions only exist converting from i32 to f32. We can handle
17037 // smaller integers by generating an extra extend, but larger ones would
17038 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
17039 // these intructions only support v2i32/v4i32 types.
17040 return SDValue();
17041 }
17042
17043 BitVector UndefElements;
17044 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
17045 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
17046 if (C == -1 || C == 0 || C > 32)
17047 return SDValue();
17048
17049 SDLoc dl(N);
17050 bool isSigned = OpOpcode == ISD::SINT_TO_FP;
17051 SDValue ConvInput = Op.getOperand(0);
17052 if (IntBits < FloatBits)
17054 dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
17055 ConvInput);
17056
17057 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
17058 Intrinsic::arm_neon_vcvtfxu2fp;
17059 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
17060 Op.getValueType(),
17061 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
17062 ConvInput, DAG.getConstant(C, dl, MVT::i32));
17063}
17064
17066 const ARMSubtarget *ST) {
17067 if (!ST->hasMVEIntegerOps())
17068 return SDValue();
17069
17070 assert(N->getOpcode() == ISD::VECREDUCE_ADD);
17071 EVT ResVT = N->getValueType(0);
17072 SDValue N0 = N->getOperand(0);
17073 SDLoc dl(N);
17074
17075 // Try to turn vecreduce_add(add(x, y)) into vecreduce(x) + vecreduce(y)
17076 if (ResVT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
17077 (N0.getValueType() == MVT::v4i32 || N0.getValueType() == MVT::v8i16 ||
17078 N0.getValueType() == MVT::v16i8)) {
17079 SDValue Red0 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(0));
17080 SDValue Red1 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(1));
17081 return DAG.getNode(ISD::ADD, dl, ResVT, Red0, Red1);
17082 }
17083
17084 // We are looking for something that will have illegal types if left alone,
17085 // but that we can convert to a single instruction under MVE. For example
17086 // vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
17087 // or
17088 // vecreduce_add(mul(zext(A, v16i32), zext(B, v16i32))) => VMLADAV.u8 A, B
17089
17090 // The legal cases are:
17091 // VADDV u/s 8/16/32
17092 // VMLAV u/s 8/16/32
17093 // VADDLV u/s 32
17094 // VMLALV u/s 16/32
17095
17096 // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
17097 // extend it and use v4i32 instead.
17098 auto ExtTypeMatches = [](SDValue A, ArrayRef<MVT> ExtTypes) {
17099 EVT AVT = A.getValueType();
17100 return any_of(ExtTypes, [&](MVT Ty) {
17101 return AVT.getVectorNumElements() == Ty.getVectorNumElements() &&
17102 AVT.bitsLE(Ty);
17103 });
17104 };
17105 auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
17106 EVT AVT = A.getValueType();
17107 if (!AVT.is128BitVector())
17108 A = DAG.getNode(ExtendCode, dl,
17110 128 / AVT.getVectorMinNumElements())),
17111 A);
17112 return A;
17113 };
17114 auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
17115 if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
17116 return SDValue();
17117 SDValue A = N0->getOperand(0);
17118 if (ExtTypeMatches(A, ExtTypes))
17119 return ExtendIfNeeded(A, ExtendCode);
17120 return SDValue();
17121 };
17122 auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
17123 ArrayRef<MVT> ExtTypes, SDValue &Mask) {
17124 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17126 return SDValue();
17127 Mask = N0->getOperand(0);
17128 SDValue Ext = N0->getOperand(1);
17129 if (Ext->getOpcode() != ExtendCode)
17130 return SDValue();
17131 SDValue A = Ext->getOperand(0);
17132 if (ExtTypeMatches(A, ExtTypes))
17133 return ExtendIfNeeded(A, ExtendCode);
17134 return SDValue();
17135 };
17136 auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17137 SDValue &A, SDValue &B) {
17138 // For a vmla we are trying to match a larger pattern:
17139 // ExtA = sext/zext A
17140 // ExtB = sext/zext B
17141 // Mul = mul ExtA, ExtB
17142 // vecreduce.add Mul
17143 // There might also be en extra extend between the mul and the addreduce, so
17144 // long as the bitwidth is high enough to make them equivalent (for example
17145 // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
17146 if (ResVT != RetTy)
17147 return false;
17148 SDValue Mul = N0;
17149 if (Mul->getOpcode() == ExtendCode &&
17150 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17151 ResVT.getScalarSizeInBits())
17152 Mul = Mul->getOperand(0);
17153 if (Mul->getOpcode() != ISD::MUL)
17154 return false;
17155 SDValue ExtA = Mul->getOperand(0);
17156 SDValue ExtB = Mul->getOperand(1);
17157 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17158 return false;
17159 A = ExtA->getOperand(0);
17160 B = ExtB->getOperand(0);
17161 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17162 A = ExtendIfNeeded(A, ExtendCode);
17163 B = ExtendIfNeeded(B, ExtendCode);
17164 return true;
17165 }
17166 return false;
17167 };
17168 auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17169 SDValue &A, SDValue &B, SDValue &Mask) {
17170 // Same as the pattern above with a select for the zero predicated lanes
17171 // ExtA = sext/zext A
17172 // ExtB = sext/zext B
17173 // Mul = mul ExtA, ExtB
17174 // N0 = select Mask, Mul, 0
17175 // vecreduce.add N0
17176 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17178 return false;
17179 Mask = N0->getOperand(0);
17180 SDValue Mul = N0->getOperand(1);
17181 if (Mul->getOpcode() == ExtendCode &&
17182 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17183 ResVT.getScalarSizeInBits())
17184 Mul = Mul->getOperand(0);
17185 if (Mul->getOpcode() != ISD::MUL)
17186 return false;
17187 SDValue ExtA = Mul->getOperand(0);
17188 SDValue ExtB = Mul->getOperand(1);
17189 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17190 return false;
17191 A = ExtA->getOperand(0);
17192 B = ExtB->getOperand(0);
17193 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17194 A = ExtendIfNeeded(A, ExtendCode);
17195 B = ExtendIfNeeded(B, ExtendCode);
17196 return true;
17197 }
17198 return false;
17199 };
17200 auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
17201 // Split illegal MVT::v16i8->i64 vector reductions into two legal v8i16->i64
17202 // reductions. The operands are extended with MVEEXT, but as they are
17203 // reductions the lane orders do not matter. MVEEXT may be combined with
17204 // loads to produce two extending loads, or else they will be expanded to
17205 // VREV/VMOVL.
17206 EVT VT = Ops[0].getValueType();
17207 if (VT == MVT::v16i8) {
17208 assert((Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) &&
17209 "Unexpected illegal long reduction opcode");
17210 bool IsUnsigned = Opcode == ARMISD::VMLALVu;
17211
17212 SDValue Ext0 =
17213 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17214 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[0]);
17215 SDValue Ext1 =
17216 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17217 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[1]);
17218
17219 SDValue MLA0 = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
17220 Ext0, Ext1);
17221 SDValue MLA1 =
17222 DAG.getNode(IsUnsigned ? ARMISD::VMLALVAu : ARMISD::VMLALVAs, dl,
17223 DAG.getVTList(MVT::i32, MVT::i32), MLA0, MLA0.getValue(1),
17224 Ext0.getValue(1), Ext1.getValue(1));
17225 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, MLA1, MLA1.getValue(1));
17226 }
17227 SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
17228 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
17229 SDValue(Node.getNode(), 1));
17230 };
17231
17232 SDValue A, B;
17233 SDValue Mask;
17234 if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17235 return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
17236 if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17237 return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
17238 if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17239 A, B))
17240 return Create64bitNode(ARMISD::VMLALVs, {A, B});
17241 if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17242 A, B))
17243 return Create64bitNode(ARMISD::VMLALVu, {A, B});
17244 if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
17245 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17246 DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
17247 if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
17248 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17249 DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
17250
17251 if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17252 Mask))
17253 return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
17254 if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17255 Mask))
17256 return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
17257 if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17258 Mask))
17259 return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
17260 if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17261 Mask))
17262 return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
17263 if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
17264 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17265 DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
17266 if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
17267 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17268 DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
17269
17270 if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
17271 return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
17272 if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
17273 return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
17274 if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
17275 return Create64bitNode(ARMISD::VADDLVs, {A});
17276 if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
17277 return Create64bitNode(ARMISD::VADDLVu, {A});
17278 if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
17279 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17280 DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
17281 if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
17282 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17283 DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
17284
17285 if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17286 return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
17287 if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17288 return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
17289 if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
17290 return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
17291 if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
17292 return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
17293 if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
17294 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17295 DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
17296 if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
17297 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17298 DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
17299
17300 // Some complications. We can get a case where the two inputs of the mul are
17301 // the same, then the output sext will have been helpfully converted to a
17302 // zext. Turn it back.
17303 SDValue Op = N0;
17304 if (Op->getOpcode() == ISD::VSELECT)
17305 Op = Op->getOperand(1);
17306 if (Op->getOpcode() == ISD::ZERO_EXTEND &&
17307 Op->getOperand(0)->getOpcode() == ISD::MUL) {
17308 SDValue Mul = Op->getOperand(0);
17309 if (Mul->getOperand(0) == Mul->getOperand(1) &&
17310 Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
17311 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
17312 if (Op != N0)
17313 Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
17314 N0->getOperand(0), Ext, N0->getOperand(2));
17315 return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
17316 }
17317 }
17318
17319 return SDValue();
17320}
17321
17322// Looks for vaddv(shuffle) or vmlav(shuffle, shuffle), with a shuffle where all
17323// the lanes are used. Due to the reduction being commutative the shuffle can be
17324// removed.
17326 unsigned VecOp = N->getOperand(0).getValueType().isVector() ? 0 : 2;
17327 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp));
17328 if (!Shuf || !Shuf->getOperand(1).isUndef())
17329 return SDValue();
17330
17331 // Check all elements are used once in the mask.
17332 ArrayRef<int> Mask = Shuf->getMask();
17333 APInt SetElts(Mask.size(), 0);
17334 for (int E : Mask) {
17335 if (E < 0 || E >= (int)Mask.size())
17336 return SDValue();
17337 SetElts.setBit(E);
17338 }
17339 if (!SetElts.isAllOnes())
17340 return SDValue();
17341
17342 if (N->getNumOperands() != VecOp + 1) {
17343 auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp + 1));
17344 if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
17345 return SDValue();
17346 }
17347
17349 for (SDValue Op : N->ops()) {
17350 if (Op.getValueType().isVector())
17351 Ops.push_back(Op.getOperand(0));
17352 else
17353 Ops.push_back(Op);
17354 }
17355 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops);
17356}
17357
17360 SDValue Op0 = N->getOperand(0);
17361 SDValue Op1 = N->getOperand(1);
17362 unsigned IsTop = N->getConstantOperandVal(2);
17363
17364 // VMOVNT a undef -> a
17365 // VMOVNB a undef -> a
17366 // VMOVNB undef a -> a
17367 if (Op1->isUndef())
17368 return Op0;
17369 if (Op0->isUndef() && !IsTop)
17370 return Op1;
17371
17372 // VMOVNt(c, VQMOVNb(a, b)) => VQMOVNt(c, b)
17373 // VMOVNb(c, VQMOVNb(a, b)) => VQMOVNb(c, b)
17374 if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
17375 Op1->getOpcode() == ARMISD::VQMOVNu) &&
17376 Op1->getConstantOperandVal(2) == 0)
17377 return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
17378 Op0, Op1->getOperand(1), N->getOperand(2));
17379
17380 // Only the bottom lanes from Qm (Op1) and either the top or bottom lanes from
17381 // Qd (Op0) are demanded from a VMOVN, depending on whether we are inserting
17382 // into the top or bottom lanes.
17383 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17384 APInt Op1DemandedElts = APInt::getSplat(NumElts, APInt::getLowBitsSet(2, 1));
17385 APInt Op0DemandedElts =
17386 IsTop ? Op1DemandedElts
17387 : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
17388
17389 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17390 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17391 return SDValue(N, 0);
17392 if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI))
17393 return SDValue(N, 0);
17394
17395 return SDValue();
17396}
17397
17400 SDValue Op0 = N->getOperand(0);
17401 unsigned IsTop = N->getConstantOperandVal(2);
17402
17403 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17404 APInt Op0DemandedElts =
17405 APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
17406 : APInt::getHighBitsSet(2, 1));
17407
17408 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17409 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17410 return SDValue(N, 0);
17411 return SDValue();
17412}
17413
17416 EVT VT = N->getValueType(0);
17417 SDValue LHS = N->getOperand(0);
17418 SDValue RHS = N->getOperand(1);
17419
17420 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
17421 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
17422 // Turn VQDMULH(shuffle, shuffle) -> shuffle(VQDMULH)
17423 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17424 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
17425 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
17426 SDLoc DL(N);
17427 SDValue NewBinOp = DCI.DAG.getNode(N->getOpcode(), DL, VT,
17428 LHS.getOperand(0), RHS.getOperand(0));
17429 SDValue UndefV = LHS.getOperand(1);
17430 return DCI.DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
17431 }
17432 return SDValue();
17433}
17434
17436 SDLoc DL(N);
17437 SDValue Op0 = N->getOperand(0);
17438 SDValue Op1 = N->getOperand(1);
17439
17440 // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
17441 // uses of the intrinsics.
17442 if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
17443 int ShiftAmt = C->getSExtValue();
17444 if (ShiftAmt == 0) {
17445 SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
17446 DAG.ReplaceAllUsesWith(N, Merge.getNode());
17447 return SDValue();
17448 }
17449
17450 if (ShiftAmt >= -32 && ShiftAmt < 0) {
17451 unsigned NewOpcode =
17452 N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17453 SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
17454 DAG.getConstant(-ShiftAmt, DL, MVT::i32));
17455 DAG.ReplaceAllUsesWith(N, NewShift.getNode());
17456 return NewShift;
17457 }
17458 }
17459
17460 return SDValue();
17461}
17462
17463/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
17465 DAGCombinerInfo &DCI) const {
17466 SelectionDAG &DAG = DCI.DAG;
17467 unsigned IntNo = N->getConstantOperandVal(0);
17468 switch (IntNo) {
17469 default:
17470 // Don't do anything for most intrinsics.
17471 break;
17472
17473 // Vector shifts: check for immediate versions and lower them.
17474 // Note: This is done during DAG combining instead of DAG legalizing because
17475 // the build_vectors for 64-bit vector element shift counts are generally
17476 // not legal, and it is hard to see their values after they get legalized to
17477 // loads from a constant pool.
17478 case Intrinsic::arm_neon_vshifts:
17479 case Intrinsic::arm_neon_vshiftu:
17480 case Intrinsic::arm_neon_vrshifts:
17481 case Intrinsic::arm_neon_vrshiftu:
17482 case Intrinsic::arm_neon_vrshiftn:
17483 case Intrinsic::arm_neon_vqshifts:
17484 case Intrinsic::arm_neon_vqshiftu:
17485 case Intrinsic::arm_neon_vqshiftsu:
17486 case Intrinsic::arm_neon_vqshiftns:
17487 case Intrinsic::arm_neon_vqshiftnu:
17488 case Intrinsic::arm_neon_vqshiftnsu:
17489 case Intrinsic::arm_neon_vqrshiftns:
17490 case Intrinsic::arm_neon_vqrshiftnu:
17491 case Intrinsic::arm_neon_vqrshiftnsu: {
17492 EVT VT = N->getOperand(1).getValueType();
17493 int64_t Cnt;
17494 unsigned VShiftOpc = 0;
17495
17496 switch (IntNo) {
17497 case Intrinsic::arm_neon_vshifts:
17498 case Intrinsic::arm_neon_vshiftu:
17499 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
17500 VShiftOpc = ARMISD::VSHLIMM;
17501 break;
17502 }
17503 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
17504 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
17506 break;
17507 }
17508 return SDValue();
17509
17510 case Intrinsic::arm_neon_vrshifts:
17511 case Intrinsic::arm_neon_vrshiftu:
17512 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
17513 break;
17514 return SDValue();
17515
17516 case Intrinsic::arm_neon_vqshifts:
17517 case Intrinsic::arm_neon_vqshiftu:
17518 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17519 break;
17520 return SDValue();
17521
17522 case Intrinsic::arm_neon_vqshiftsu:
17523 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17524 break;
17525 llvm_unreachable("invalid shift count for vqshlu intrinsic");
17526
17527 case Intrinsic::arm_neon_vrshiftn:
17528 case Intrinsic::arm_neon_vqshiftns:
17529 case Intrinsic::arm_neon_vqshiftnu:
17530 case Intrinsic::arm_neon_vqshiftnsu:
17531 case Intrinsic::arm_neon_vqrshiftns:
17532 case Intrinsic::arm_neon_vqrshiftnu:
17533 case Intrinsic::arm_neon_vqrshiftnsu:
17534 // Narrowing shifts require an immediate right shift.
17535 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
17536 break;
17537 llvm_unreachable("invalid shift count for narrowing vector shift "
17538 "intrinsic");
17539
17540 default:
17541 llvm_unreachable("unhandled vector shift");
17542 }
17543
17544 switch (IntNo) {
17545 case Intrinsic::arm_neon_vshifts:
17546 case Intrinsic::arm_neon_vshiftu:
17547 // Opcode already set above.
17548 break;
17549 case Intrinsic::arm_neon_vrshifts:
17550 VShiftOpc = ARMISD::VRSHRsIMM;
17551 break;
17552 case Intrinsic::arm_neon_vrshiftu:
17553 VShiftOpc = ARMISD::VRSHRuIMM;
17554 break;
17555 case Intrinsic::arm_neon_vrshiftn:
17556 VShiftOpc = ARMISD::VRSHRNIMM;
17557 break;
17558 case Intrinsic::arm_neon_vqshifts:
17559 VShiftOpc = ARMISD::VQSHLsIMM;
17560 break;
17561 case Intrinsic::arm_neon_vqshiftu:
17562 VShiftOpc = ARMISD::VQSHLuIMM;
17563 break;
17564 case Intrinsic::arm_neon_vqshiftsu:
17565 VShiftOpc = ARMISD::VQSHLsuIMM;
17566 break;
17567 case Intrinsic::arm_neon_vqshiftns:
17568 VShiftOpc = ARMISD::VQSHRNsIMM;
17569 break;
17570 case Intrinsic::arm_neon_vqshiftnu:
17571 VShiftOpc = ARMISD::VQSHRNuIMM;
17572 break;
17573 case Intrinsic::arm_neon_vqshiftnsu:
17574 VShiftOpc = ARMISD::VQSHRNsuIMM;
17575 break;
17576 case Intrinsic::arm_neon_vqrshiftns:
17577 VShiftOpc = ARMISD::VQRSHRNsIMM;
17578 break;
17579 case Intrinsic::arm_neon_vqrshiftnu:
17580 VShiftOpc = ARMISD::VQRSHRNuIMM;
17581 break;
17582 case Intrinsic::arm_neon_vqrshiftnsu:
17583 VShiftOpc = ARMISD::VQRSHRNsuIMM;
17584 break;
17585 }
17586
17587 SDLoc dl(N);
17588 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17589 N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
17590 }
17591
17592 case Intrinsic::arm_neon_vshiftins: {
17593 EVT VT = N->getOperand(1).getValueType();
17594 int64_t Cnt;
17595 unsigned VShiftOpc = 0;
17596
17597 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
17598 VShiftOpc = ARMISD::VSLIIMM;
17599 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
17600 VShiftOpc = ARMISD::VSRIIMM;
17601 else {
17602 llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
17603 }
17604
17605 SDLoc dl(N);
17606 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17607 N->getOperand(1), N->getOperand(2),
17608 DAG.getConstant(Cnt, dl, MVT::i32));
17609 }
17610
17611 case Intrinsic::arm_neon_vqrshifts:
17612 case Intrinsic::arm_neon_vqrshiftu:
17613 // No immediate versions of these to check for.
17614 break;
17615
17616 case Intrinsic::arm_mve_vqdmlah:
17617 case Intrinsic::arm_mve_vqdmlash:
17618 case Intrinsic::arm_mve_vqrdmlah:
17619 case Intrinsic::arm_mve_vqrdmlash:
17620 case Intrinsic::arm_mve_vmla_n_predicated:
17621 case Intrinsic::arm_mve_vmlas_n_predicated:
17622 case Intrinsic::arm_mve_vqdmlah_predicated:
17623 case Intrinsic::arm_mve_vqdmlash_predicated:
17624 case Intrinsic::arm_mve_vqrdmlah_predicated:
17625 case Intrinsic::arm_mve_vqrdmlash_predicated: {
17626 // These intrinsics all take an i32 scalar operand which is narrowed to the
17627 // size of a single lane of the vector type they return. So we don't need
17628 // any bits of that operand above that point, which allows us to eliminate
17629 // uxth/sxth.
17630 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17631 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17632 if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
17633 return SDValue();
17634 break;
17635 }
17636
17637 case Intrinsic::arm_mve_minv:
17638 case Intrinsic::arm_mve_maxv:
17639 case Intrinsic::arm_mve_minav:
17640 case Intrinsic::arm_mve_maxav:
17641 case Intrinsic::arm_mve_minv_predicated:
17642 case Intrinsic::arm_mve_maxv_predicated:
17643 case Intrinsic::arm_mve_minav_predicated:
17644 case Intrinsic::arm_mve_maxav_predicated: {
17645 // These intrinsics all take an i32 scalar operand which is narrowed to the
17646 // size of a single lane of the vector type they take as the other input.
17647 unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17648 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17649 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
17650 return SDValue();
17651 break;
17652 }
17653
17654 case Intrinsic::arm_mve_addv: {
17655 // Turn this intrinsic straight into the appropriate ARMISD::VADDV node,
17656 // which allow PerformADDVecReduce to turn it into VADDLV when possible.
17657 bool Unsigned = N->getConstantOperandVal(2);
17658 unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
17659 return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
17660 }
17661
17662 case Intrinsic::arm_mve_addlv:
17663 case Intrinsic::arm_mve_addlv_predicated: {
17664 // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR
17665 // which recombines the two outputs into an i64
17666 bool Unsigned = N->getConstantOperandVal(2);
17667 unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
17670
17672 for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
17673 if (i != 2) // skip the unsigned flag
17674 Ops.push_back(N->getOperand(i));
17675
17676 SDLoc dl(N);
17677 SDValue val = DAG.getNode(Opc, dl, {MVT::i32, MVT::i32}, Ops);
17678 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, val.getValue(0),
17679 val.getValue(1));
17680 }
17681 }
17682
17683 return SDValue();
17684}
17685
17686/// PerformShiftCombine - Checks for immediate versions of vector shifts and
17687/// lowers them. As with the vector shift intrinsics, this is done during DAG
17688/// combining instead of DAG legalizing because the build_vectors for 64-bit
17689/// vector element shift counts are generally not legal, and it is hard to see
17690/// their values after they get legalized to loads from a constant pool.
17693 const ARMSubtarget *ST) {
17694 SelectionDAG &DAG = DCI.DAG;
17695 EVT VT = N->getValueType(0);
17696
17697 if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
17698 N->getOperand(0)->getOpcode() == ISD::AND &&
17699 N->getOperand(0)->hasOneUse()) {
17700 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
17701 return SDValue();
17702 // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
17703 // usually show up because instcombine prefers to canonicalize it to
17704 // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
17705 // out of GEP lowering in some cases.
17706 SDValue N0 = N->getOperand(0);
17707 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17708 if (!ShiftAmtNode)
17709 return SDValue();
17710 uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
17711 ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17712 if (!AndMaskNode)
17713 return SDValue();
17714 uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
17715 // Don't transform uxtb/uxth.
17716 if (AndMask == 255 || AndMask == 65535)
17717 return SDValue();
17718 if (isMask_32(AndMask)) {
17719 uint32_t MaskedBits = llvm::countl_zero(AndMask);
17720 if (MaskedBits > ShiftAmt) {
17721 SDLoc DL(N);
17722 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
17723 DAG.getConstant(MaskedBits, DL, MVT::i32));
17724 return DAG.getNode(
17725 ISD::SRL, DL, MVT::i32, SHL,
17726 DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
17727 }
17728 }
17729 }
17730
17731 // Nothing to be done for scalar shifts.
17732 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17733 if (!VT.isVector() || !TLI.isTypeLegal(VT))
17734 return SDValue();
17735 if (ST->hasMVEIntegerOps())
17736 return SDValue();
17737
17738 int64_t Cnt;
17739
17740 switch (N->getOpcode()) {
17741 default: llvm_unreachable("unexpected shift opcode");
17742
17743 case ISD::SHL:
17744 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
17745 SDLoc dl(N);
17746 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
17747 DAG.getConstant(Cnt, dl, MVT::i32));
17748 }
17749 break;
17750
17751 case ISD::SRA:
17752 case ISD::SRL:
17753 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
17754 unsigned VShiftOpc =
17755 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17756 SDLoc dl(N);
17757 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
17758 DAG.getConstant(Cnt, dl, MVT::i32));
17759 }
17760 }
17761 return SDValue();
17762}
17763
17764// Look for a sign/zero/fpextend extend of a larger than legal load. This can be
17765// split into multiple extending loads, which are simpler to deal with than an
17766// arbitrary extend. For fp extends we use an integer extending load and a VCVTL
17767// to convert the type to an f32.
17769 SDValue N0 = N->getOperand(0);
17770 if (N0.getOpcode() != ISD::LOAD)
17771 return SDValue();
17772 LoadSDNode *LD = cast<LoadSDNode>(N0.getNode());
17773 if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
17774 LD->getExtensionType() != ISD::NON_EXTLOAD)
17775 return SDValue();
17776 EVT FromVT = LD->getValueType(0);
17777 EVT ToVT = N->getValueType(0);
17778 if (!ToVT.isVector())
17779 return SDValue();
17781 EVT ToEltVT = ToVT.getVectorElementType();
17782 EVT FromEltVT = FromVT.getVectorElementType();
17783
17784 unsigned NumElements = 0;
17785 if (ToEltVT == MVT::i32 && FromEltVT == MVT::i8)
17786 NumElements = 4;
17787 if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
17788 NumElements = 4;
17789 if (NumElements == 0 ||
17790 (FromEltVT != MVT::f16 && FromVT.getVectorNumElements() == NumElements) ||
17791 FromVT.getVectorNumElements() % NumElements != 0 ||
17792 !isPowerOf2_32(NumElements))
17793 return SDValue();
17794
17795 LLVMContext &C = *DAG.getContext();
17796 SDLoc DL(LD);
17797 // Details about the old load
17798 SDValue Ch = LD->getChain();
17799 SDValue BasePtr = LD->getBasePtr();
17800 Align Alignment = LD->getOriginalAlign();
17801 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
17802 AAMDNodes AAInfo = LD->getAAInfo();
17803
17804 ISD::LoadExtType NewExtType =
17805 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
17806 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
17807 EVT NewFromVT = EVT::getVectorVT(
17808 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
17809 EVT NewToVT = EVT::getVectorVT(
17810 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
17811
17814 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
17815 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
17816 SDValue NewPtr =
17817 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
17818
17819 SDValue NewLoad =
17820 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
17821 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17822 Alignment, MMOFlags, AAInfo);
17823 Loads.push_back(NewLoad);
17824 Chains.push_back(SDValue(NewLoad.getNode(), 1));
17825 }
17826
17827 // Float truncs need to extended with VCVTB's into their floating point types.
17828 if (FromEltVT == MVT::f16) {
17830
17831 for (unsigned i = 0; i < Loads.size(); i++) {
17832 SDValue LoadBC =
17833 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v8f16, Loads[i]);
17834 SDValue FPExt = DAG.getNode(ARMISD::VCVTL, DL, MVT::v4f32, LoadBC,
17835 DAG.getConstant(0, DL, MVT::i32));
17836 Extends.push_back(FPExt);
17837 }
17838
17839 Loads = Extends;
17840 }
17841
17842 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
17843 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
17844 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Loads);
17845}
17846
17847/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
17848/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
17850 const ARMSubtarget *ST) {
17851 SDValue N0 = N->getOperand(0);
17852
17853 // Check for sign- and zero-extensions of vector extract operations of 8- and
17854 // 16-bit vector elements. NEON and MVE support these directly. They are
17855 // handled during DAG combining because type legalization will promote them
17856 // to 32-bit types and it is messy to recognize the operations after that.
17857 if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17859 SDValue Vec = N0.getOperand(0);
17860 SDValue Lane = N0.getOperand(1);
17861 EVT VT = N->getValueType(0);
17862 EVT EltVT = N0.getValueType();
17863 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17864
17865 if (VT == MVT::i32 &&
17866 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
17867 TLI.isTypeLegal(Vec.getValueType()) &&
17868 isa<ConstantSDNode>(Lane)) {
17869
17870 unsigned Opc = 0;
17871 switch (N->getOpcode()) {
17872 default: llvm_unreachable("unexpected opcode");
17873 case ISD::SIGN_EXTEND:
17874 Opc = ARMISD::VGETLANEs;
17875 break;
17876 case ISD::ZERO_EXTEND:
17877 case ISD::ANY_EXTEND:
17878 Opc = ARMISD::VGETLANEu;
17879 break;
17880 }
17881 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
17882 }
17883 }
17884
17885 if (ST->hasMVEIntegerOps())
17886 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17887 return NewLoad;
17888
17889 return SDValue();
17890}
17891
17893 const ARMSubtarget *ST) {
17894 if (ST->hasMVEFloatOps())
17895 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17896 return NewLoad;
17897
17898 return SDValue();
17899}
17900
17901// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
17902// constant bounds.
17904 const ARMSubtarget *Subtarget) {
17905 if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17906 !Subtarget->isThumb2())
17907 return SDValue();
17908
17909 EVT VT = Op.getValueType();
17910 SDValue Op0 = Op.getOperand(0);
17911
17912 if (VT != MVT::i32 ||
17913 (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
17914 !isa<ConstantSDNode>(Op.getOperand(1)) ||
17915 !isa<ConstantSDNode>(Op0.getOperand(1)))
17916 return SDValue();
17917
17918 SDValue Min = Op;
17919 SDValue Max = Op0;
17920 SDValue Input = Op0.getOperand(0);
17921 if (Min.getOpcode() == ISD::SMAX)
17922 std::swap(Min, Max);
17923
17924 APInt MinC = Min.getConstantOperandAPInt(1);
17925 APInt MaxC = Max.getConstantOperandAPInt(1);
17926
17927 if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
17928 !(MinC + 1).isPowerOf2())
17929 return SDValue();
17930
17931 SDLoc DL(Op);
17932 if (MinC == ~MaxC)
17933 return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
17934 DAG.getConstant(MinC.countr_one(), DL, VT));
17935 if (MaxC == 0)
17936 return DAG.getNode(ARMISD::USAT, DL, VT, Input,
17937 DAG.getConstant(MinC.countr_one(), DL, VT));
17938
17939 return SDValue();
17940}
17941
17942/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
17943/// saturates.
17945 const ARMSubtarget *ST) {
17946 EVT VT = N->getValueType(0);
17947 SDValue N0 = N->getOperand(0);
17948
17949 if (VT == MVT::i32)
17950 return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);
17951
17952 if (!ST->hasMVEIntegerOps())
17953 return SDValue();
17954
17955 if (SDValue V = PerformVQDMULHCombine(N, DAG))
17956 return V;
17957
17958 if (VT != MVT::v4i32 && VT != MVT::v8i16)
17959 return SDValue();
17960
17961 auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {
17962 // Check one is a smin and the other is a smax
17963 if (Min->getOpcode() != ISD::SMIN)
17964 std::swap(Min, Max);
17965 if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
17966 return false;
17967
17968 APInt SaturateC;
17969 if (VT == MVT::v4i32)
17970 SaturateC = APInt(32, (1 << 15) - 1, true);
17971 else //if (VT == MVT::v8i16)
17972 SaturateC = APInt(16, (1 << 7) - 1, true);
17973
17974 APInt MinC, MaxC;
17975 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
17976 MinC != SaturateC)
17977 return false;
17978 if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
17979 MaxC != ~SaturateC)
17980 return false;
17981 return true;
17982 };
17983
17984 if (IsSignedSaturate(N, N0.getNode())) {
17985 SDLoc DL(N);
17986 MVT ExtVT, HalfVT;
17987 if (VT == MVT::v4i32) {
17988 HalfVT = MVT::v8i16;
17989 ExtVT = MVT::v4i16;
17990 } else { // if (VT == MVT::v8i16)
17991 HalfVT = MVT::v16i8;
17992 ExtVT = MVT::v8i8;
17993 }
17994
17995 // Create a VQMOVNB with undef top lanes, then signed extended into the top
17996 // half. That extend will hopefully be removed if only the bottom bits are
17997 // demanded (though a truncating store, for example).
17998 SDValue VQMOVN =
17999 DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),
18000 N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
18001 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18002 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,
18003 DAG.getValueType(ExtVT));
18004 }
18005
18006 auto IsUnsignedSaturate = [&](SDNode *Min) {
18007 // For unsigned, we just need to check for <= 0xffff
18008 if (Min->getOpcode() != ISD::UMIN)
18009 return false;
18010
18011 APInt SaturateC;
18012 if (VT == MVT::v4i32)
18013 SaturateC = APInt(32, (1 << 16) - 1, true);
18014 else //if (VT == MVT::v8i16)
18015 SaturateC = APInt(16, (1 << 8) - 1, true);
18016
18017 APInt MinC;
18018 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18019 MinC != SaturateC)
18020 return false;
18021 return true;
18022 };
18023
18024 if (IsUnsignedSaturate(N)) {
18025 SDLoc DL(N);
18026 MVT HalfVT;
18027 unsigned ExtConst;
18028 if (VT == MVT::v4i32) {
18029 HalfVT = MVT::v8i16;
18030 ExtConst = 0x0000FFFF;
18031 } else { //if (VT == MVT::v8i16)
18032 HalfVT = MVT::v16i8;
18033 ExtConst = 0x00FF;
18034 }
18035
18036 // Create a VQMOVNB with undef top lanes, then ZExt into the top half with
18037 // an AND. That extend will hopefully be removed if only the bottom bits are
18038 // demanded (though a truncating store, for example).
18039 SDValue VQMOVN =
18040 DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,
18041 DAG.getConstant(0, DL, MVT::i32));
18042 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18043 return DAG.getNode(ISD::AND, DL, VT, Bitcast,
18044 DAG.getConstant(ExtConst, DL, VT));
18045 }
18046
18047 return SDValue();
18048}
18049
18051 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
18052 if (!C)
18053 return nullptr;
18054 const APInt *CV = &C->getAPIntValue();
18055 return CV->isPowerOf2() ? CV : nullptr;
18056}
18057
18059 // If we have a CMOV, OR and AND combination such as:
18060 // if (x & CN)
18061 // y |= CM;
18062 //
18063 // And:
18064 // * CN is a single bit;
18065 // * All bits covered by CM are known zero in y
18066 //
18067 // Then we can convert this into a sequence of BFI instructions. This will
18068 // always be a win if CM is a single bit, will always be no worse than the
18069 // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
18070 // three bits (due to the extra IT instruction).
18071
18072 SDValue Op0 = CMOV->getOperand(0);
18073 SDValue Op1 = CMOV->getOperand(1);
18074 auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
18075 SDValue CmpZ = CMOV->getOperand(4);
18076
18077 // The compare must be against zero.
18078 if (!isNullConstant(CmpZ->getOperand(1)))
18079 return SDValue();
18080
18081 assert(CmpZ->getOpcode() == ARMISD::CMPZ);
18082 SDValue And = CmpZ->getOperand(0);
18083 if (And->getOpcode() != ISD::AND)
18084 return SDValue();
18085 const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
18086 if (!AndC)
18087 return SDValue();
18088 SDValue X = And->getOperand(0);
18089
18090 if (CC == ARMCC::EQ) {
18091 // We're performing an "equal to zero" compare. Swap the operands so we
18092 // canonicalize on a "not equal to zero" compare.
18093 std::swap(Op0, Op1);
18094 } else {
18095 assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
18096 }
18097
18098 if (Op1->getOpcode() != ISD::OR)
18099 return SDValue();
18100
18101 ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
18102 if (!OrC)
18103 return SDValue();
18104 SDValue Y = Op1->getOperand(0);
18105
18106 if (Op0 != Y)
18107 return SDValue();
18108
18109 // Now, is it profitable to continue?
18110 APInt OrCI = OrC->getAPIntValue();
18111 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
18112 if (OrCI.popcount() > Heuristic)
18113 return SDValue();
18114
18115 // Lastly, can we determine that the bits defined by OrCI
18116 // are zero in Y?
18117 KnownBits Known = DAG.computeKnownBits(Y);
18118 if ((OrCI & Known.Zero) != OrCI)
18119 return SDValue();
18120
18121 // OK, we can do the combine.
18122 SDValue V = Y;
18123 SDLoc dl(X);
18124 EVT VT = X.getValueType();
18125 unsigned BitInX = AndC->logBase2();
18126
18127 if (BitInX != 0) {
18128 // We must shift X first.
18129 X = DAG.getNode(ISD::SRL, dl, VT, X,
18130 DAG.getConstant(BitInX, dl, VT));
18131 }
18132
18133 for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
18134 BitInY < NumActiveBits; ++BitInY) {
18135 if (OrCI[BitInY] == 0)
18136 continue;
18137 APInt Mask(VT.getSizeInBits(), 0);
18138 Mask.setBit(BitInY);
18139 V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
18140 // Confusingly, the operand is an *inverted* mask.
18141 DAG.getConstant(~Mask, dl, VT));
18142 }
18143
18144 return V;
18145}
18146
18147// Given N, the value controlling the conditional branch, search for the loop
18148// intrinsic, returning it, along with how the value is used. We need to handle
18149// patterns such as the following:
18150// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
18151// (brcond (setcc (loop.decrement), 0, eq), exit)
18152// (brcond (setcc (loop.decrement), 0, ne), header)
18154 bool &Negate) {
18155 switch (N->getOpcode()) {
18156 default:
18157 break;
18158 case ISD::XOR: {
18159 if (!isa<ConstantSDNode>(N.getOperand(1)))
18160 return SDValue();
18161 if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
18162 return SDValue();
18163 Negate = !Negate;
18164 return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
18165 }
18166 case ISD::SETCC: {
18167 auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
18168 if (!Const)
18169 return SDValue();
18170 if (Const->isZero())
18171 Imm = 0;
18172 else if (Const->isOne())
18173 Imm = 1;
18174 else
18175 return SDValue();
18176 CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
18177 return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
18178 }
18180 unsigned IntOp = N.getConstantOperandVal(1);
18181 if (IntOp != Intrinsic::test_start_loop_iterations &&
18182 IntOp != Intrinsic::loop_decrement_reg)
18183 return SDValue();
18184 return N;
18185 }
18186 }
18187 return SDValue();
18188}
18189
18192 const ARMSubtarget *ST) {
18193
18194 // The hwloop intrinsics that we're interested are used for control-flow,
18195 // either for entering or exiting the loop:
18196 // - test.start.loop.iterations will test whether its operand is zero. If it
18197 // is zero, the proceeding branch should not enter the loop.
18198 // - loop.decrement.reg also tests whether its operand is zero. If it is
18199 // zero, the proceeding branch should not branch back to the beginning of
18200 // the loop.
18201 // So here, we need to check that how the brcond is using the result of each
18202 // of the intrinsics to ensure that we're branching to the right place at the
18203 // right time.
18204
18206 SDValue Cond;
18207 int Imm = 1;
18208 bool Negate = false;
18209 SDValue Chain = N->getOperand(0);
18210 SDValue Dest;
18211
18212 if (N->getOpcode() == ISD::BRCOND) {
18213 CC = ISD::SETEQ;
18214 Cond = N->getOperand(1);
18215 Dest = N->getOperand(2);
18216 } else {
18217 assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");
18218 CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18219 Cond = N->getOperand(2);
18220 Dest = N->getOperand(4);
18221 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
18222 if (!Const->isOne() && !Const->isZero())
18223 return SDValue();
18224 Imm = Const->getZExtValue();
18225 } else
18226 return SDValue();
18227 }
18228
18229 SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
18230 if (!Int)
18231 return SDValue();
18232
18233 if (Negate)
18234 CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);
18235
18236 auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
18237 return (CC == ISD::SETEQ && Imm == 0) ||
18238 (CC == ISD::SETNE && Imm == 1) ||
18239 (CC == ISD::SETLT && Imm == 1) ||
18240 (CC == ISD::SETULT && Imm == 1);
18241 };
18242
18243 auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
18244 return (CC == ISD::SETEQ && Imm == 1) ||
18245 (CC == ISD::SETNE && Imm == 0) ||
18246 (CC == ISD::SETGT && Imm == 0) ||
18247 (CC == ISD::SETUGT && Imm == 0) ||
18248 (CC == ISD::SETGE && Imm == 1) ||
18249 (CC == ISD::SETUGE && Imm == 1);
18250 };
18251
18252 assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
18253 "unsupported condition");
18254
18255 SDLoc dl(Int);
18256 SelectionDAG &DAG = DCI.DAG;
18257 SDValue Elements = Int.getOperand(2);
18258 unsigned IntOp = Int->getConstantOperandVal(1);
18259 assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR)
18260 && "expected single br user");
18261 SDNode *Br = *N->use_begin();
18262 SDValue OtherTarget = Br->getOperand(1);
18263
18264 // Update the unconditional branch to branch to the given Dest.
18265 auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
18266 SDValue NewBrOps[] = { Br->getOperand(0), Dest };
18267 SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
18268 DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
18269 };
18270
18271 if (IntOp == Intrinsic::test_start_loop_iterations) {
18272 SDValue Res;
18273 SDValue Setup = DAG.getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);
18274 // We expect this 'instruction' to branch when the counter is zero.
18275 if (IsTrueIfZero(CC, Imm)) {
18276 SDValue Ops[] = {Chain, Setup, Dest};
18277 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18278 } else {
18279 // The logic is the reverse of what we need for WLS, so find the other
18280 // basic block target: the target of the proceeding br.
18281 UpdateUncondBr(Br, Dest, DAG);
18282
18283 SDValue Ops[] = {Chain, Setup, OtherTarget};
18284 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18285 }
18286 // Update LR count to the new value
18287 DAG.ReplaceAllUsesOfValueWith(Int.getValue(0), Setup);
18288 // Update chain
18289 DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0));
18290 return Res;
18291 } else {
18292 SDValue Size =
18293 DAG.getTargetConstant(Int.getConstantOperandVal(3), dl, MVT::i32);
18294 SDValue Args[] = { Int.getOperand(0), Elements, Size, };
18295 SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
18296 DAG.getVTList(MVT::i32, MVT::Other), Args);
18297 DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
18298
18299 // We expect this instruction to branch when the count is not zero.
18300 SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
18301
18302 // Update the unconditional branch to target the loop preheader if we've
18303 // found the condition has been reversed.
18304 if (Target == OtherTarget)
18305 UpdateUncondBr(Br, Dest, DAG);
18306
18307 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18308 SDValue(LoopDec.getNode(), 1), Chain);
18309
18310 SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
18311 return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
18312 }
18313 return SDValue();
18314}
18315
18316/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
18317SDValue
18319 SDValue Cmp = N->getOperand(4);
18320 if (Cmp.getOpcode() != ARMISD::CMPZ)
18321 // Only looking at NE cases.
18322 return SDValue();
18323
18324 EVT VT = N->getValueType(0);
18325 SDLoc dl(N);
18326 SDValue LHS = Cmp.getOperand(0);
18327 SDValue RHS = Cmp.getOperand(1);
18328 SDValue Chain = N->getOperand(0);
18329 SDValue BB = N->getOperand(1);
18330 SDValue ARMcc = N->getOperand(2);
18332
18333 // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
18334 // -> (brcond Chain BB CC CPSR Cmp)
18335 if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
18336 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18337 LHS->getOperand(0)->hasOneUse() &&
18338 isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
18339 isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
18340 isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
18341 return DAG.getNode(
18342 ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
18343 LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
18344 }
18345
18346 return SDValue();
18347}
18348
18349/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
18350SDValue
18352 SDValue Cmp = N->getOperand(4);
18353 if (Cmp.getOpcode() != ARMISD::CMPZ)
18354 // Only looking at EQ and NE cases.
18355 return SDValue();
18356
18357 EVT VT = N->getValueType(0);
18358 SDLoc dl(N);
18359 SDValue LHS = Cmp.getOperand(0);
18360 SDValue RHS = Cmp.getOperand(1);
18361 SDValue FalseVal = N->getOperand(0);
18362 SDValue TrueVal = N->getOperand(1);
18363 SDValue ARMcc = N->getOperand(2);
18365
18366 // BFI is only available on V6T2+.
18367 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18369 if (R)
18370 return R;
18371 }
18372
18373 // Simplify
18374 // mov r1, r0
18375 // cmp r1, x
18376 // mov r0, y
18377 // moveq r0, x
18378 // to
18379 // cmp r0, x
18380 // movne r0, y
18381 //
18382 // mov r1, r0
18383 // cmp r1, x
18384 // mov r0, x
18385 // movne r0, y
18386 // to
18387 // cmp r0, x
18388 // movne r0, y
18389 /// FIXME: Turn this into a target neutral optimization?
18390 SDValue Res;
18391 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
18392 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
18393 N->getOperand(3), Cmp);
18394 } else if (CC == ARMCC::EQ && TrueVal == RHS) {
18395 SDValue ARMcc;
18396 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
18397 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
18398 N->getOperand(3), NewCmp);
18399 }
18400
18401 // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
18402 // -> (cmov F T CC CPSR Cmp)
18403 if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
18404 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
18406 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
18407 LHS->getOperand(2), LHS->getOperand(3),
18408 LHS->getOperand(4));
18409 }
18410
18411 if (!VT.isInteger())
18412 return SDValue();
18413
18414 // Fold away an unneccessary CMPZ/CMOV
18415 // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) ->
18416 // if C1==EQ -> CMOV A, B, C2, $cpsr, D
18417 // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D
18418 if (N->getConstantOperandVal(2) == ARMCC::EQ ||
18419 N->getConstantOperandVal(2) == ARMCC::NE) {
18421 if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) {
18422 if (N->getConstantOperandVal(2) == ARMCC::NE)
18424 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
18425 N->getOperand(1),
18426 DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32),
18427 N->getOperand(3), C);
18428 }
18429 }
18430
18431 // Materialize a boolean comparison for integers so we can avoid branching.
18432 if (isNullConstant(FalseVal)) {
18433 if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
18434 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18435 // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
18436 // right 5 bits will make that 32 be 1, otherwise it will be 0.
18437 // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
18438 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18439 Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
18440 DAG.getConstant(5, dl, MVT::i32));
18441 } else {
18442 // CMOV 0, 1, ==, (CMPZ x, y) ->
18443 // (UADDO_CARRY (SUB x, y), t:0, t:1)
18444 // where t = (USUBO_CARRY 0, (SUB x, y), 0)
18445 //
18446 // The USUBO_CARRY computes 0 - (x - y) and this will give a borrow when
18447 // x != y. In other words, a carry C == 1 when x == y, C == 0
18448 // otherwise.
18449 // The final UADDO_CARRY computes
18450 // x - y + (0 - (x - y)) + C == C
18451 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18452 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18453 SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
18454 // ISD::USUBO_CARRY returns a borrow but we want the carry here
18455 // actually.
18456 SDValue Carry =
18457 DAG.getNode(ISD::SUB, dl, MVT::i32,
18458 DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
18459 Res = DAG.getNode(ISD::UADDO_CARRY, dl, VTs, Sub, Neg, Carry);
18460 }
18461 } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
18462 (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
18463 // This seems pointless but will allow us to combine it further below.
18464 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
18465 SDValue Sub =
18466 DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18467 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18468 Sub.getValue(1), SDValue());
18469 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
18470 N->getOperand(3), CPSRGlue.getValue(1));
18471 FalseVal = Sub;
18472 }
18473 } else if (isNullConstant(TrueVal)) {
18474 if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
18475 (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
18476 // This seems pointless but will allow us to combine it further below
18477 // Note that we change == for != as this is the dual for the case above.
18478 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
18479 SDValue Sub =
18480 DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18481 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18482 Sub.getValue(1), SDValue());
18483 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
18484 DAG.getConstant(ARMCC::NE, dl, MVT::i32),
18485 N->getOperand(3), CPSRGlue.getValue(1));
18486 FalseVal = Sub;
18487 }
18488 }
18489
18490 // On Thumb1, the DAG above may be further combined if z is a power of 2
18491 // (z == 2 ^ K).
18492 // CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
18493 // t1 = (USUBO (SUB x, y), 1)
18494 // t2 = (USUBO_CARRY (SUB x, y), t1:0, t1:1)
18495 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18496 //
18497 // This also handles the special case of comparing against zero; it's
18498 // essentially, the same pattern, except there's no SUBS:
18499 // CMOV x, z, !=, (CMPZ x, 0) ->
18500 // t1 = (USUBO x, 1)
18501 // t2 = (USUBO_CARRY x, t1:0, t1:1)
18502 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18503 const APInt *TrueConst;
18504 if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
18505 ((FalseVal.getOpcode() == ARMISD::SUBS &&
18506 FalseVal.getOperand(0) == LHS && FalseVal.getOperand(1) == RHS) ||
18507 (FalseVal == LHS && isNullConstant(RHS))) &&
18508 (TrueConst = isPowerOf2Constant(TrueVal))) {
18509 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18510 unsigned ShiftAmount = TrueConst->logBase2();
18511 if (ShiftAmount)
18512 TrueVal = DAG.getConstant(1, dl, VT);
18513 SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
18514 Res = DAG.getNode(ISD::USUBO_CARRY, dl, VTs, FalseVal, Subc,
18515 Subc.getValue(1));
18516
18517 if (ShiftAmount)
18518 Res = DAG.getNode(ISD::SHL, dl, VT, Res,
18519 DAG.getConstant(ShiftAmount, dl, MVT::i32));
18520 }
18521
18522 if (Res.getNode()) {
18523 KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
18524 // Capture demanded bits information that would be otherwise lost.
18525 if (Known.Zero == 0xfffffffe)
18526 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18527 DAG.getValueType(MVT::i1));
18528 else if (Known.Zero == 0xffffff00)
18529 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18530 DAG.getValueType(MVT::i8));
18531 else if (Known.Zero == 0xffff0000)
18532 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18533 DAG.getValueType(MVT::i16));
18534 }
18535
18536 return Res;
18537}
18538
18541 const ARMSubtarget *ST) {
18542 SelectionDAG &DAG = DCI.DAG;
18543 SDValue Src = N->getOperand(0);
18544 EVT DstVT = N->getValueType(0);
18545
18546 // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
18547 if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18548 EVT SrcVT = Src.getValueType();
18549 if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
18550 return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
18551 }
18552
18553 // We may have a bitcast of something that has already had this bitcast
18554 // combine performed on it, so skip past any VECTOR_REG_CASTs.
18555 while (Src.getOpcode() == ARMISD::VECTOR_REG_CAST)
18556 Src = Src.getOperand(0);
18557
18558 // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
18559 // would be generated is at least the width of the element type.
18560 EVT SrcVT = Src.getValueType();
18561 if ((Src.getOpcode() == ARMISD::VMOVIMM ||
18562 Src.getOpcode() == ARMISD::VMVNIMM ||
18563 Src.getOpcode() == ARMISD::VMOVFPIMM) &&
18564 SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&
18565 DAG.getDataLayout().isBigEndian())
18566 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
18567
18568 // bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD x
18569 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
18570 return R;
18571
18572 return SDValue();
18573}
18574
18575// Some combines for the MVETrunc truncations legalizer helper. Also lowers the
18576// node into stack operations after legalizeOps.
18579 SelectionDAG &DAG = DCI.DAG;
18580 EVT VT = N->getValueType(0);
18581 SDLoc DL(N);
18582
18583 // MVETrunc(Undef, Undef) -> Undef
18584 if (all_of(N->ops(), [](SDValue Op) { return Op.isUndef(); }))
18585 return DAG.getUNDEF(VT);
18586
18587 // MVETrunc(MVETrunc a b, MVETrunc c, d) -> MVETrunc
18588 if (N->getNumOperands() == 2 &&
18589 N->getOperand(0).getOpcode() == ARMISD::MVETRUNC &&
18590 N->getOperand(1).getOpcode() == ARMISD::MVETRUNC)
18591 return DAG.getNode(ARMISD::MVETRUNC, DL, VT, N->getOperand(0).getOperand(0),
18592 N->getOperand(0).getOperand(1),
18593 N->getOperand(1).getOperand(0),
18594 N->getOperand(1).getOperand(1));
18595
18596 // MVETrunc(shuffle, shuffle) -> VMOVN
18597 if (N->getNumOperands() == 2 &&
18598 N->getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
18599 N->getOperand(1).getOpcode() == ISD::VECTOR_SHUFFLE) {
18600 auto *S0 = cast<ShuffleVectorSDNode>(N->getOperand(0).getNode());
18601 auto *S1 = cast<ShuffleVectorSDNode>(N->getOperand(1).getNode());
18602
18603 if (S0->getOperand(0) == S1->getOperand(0) &&
18604 S0->getOperand(1) == S1->getOperand(1)) {
18605 // Construct complete shuffle mask
18606 SmallVector<int, 8> Mask(S0->getMask());
18607 Mask.append(S1->getMask().begin(), S1->getMask().end());
18608
18609 if (isVMOVNTruncMask(Mask, VT, false))
18610 return DAG.getNode(
18611 ARMISD::VMOVN, DL, VT,
18612 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18613 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18614 DAG.getConstant(1, DL, MVT::i32));
18615 if (isVMOVNTruncMask(Mask, VT, true))
18616 return DAG.getNode(
18617 ARMISD::VMOVN, DL, VT,
18618 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18619 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18620 DAG.getConstant(1, DL, MVT::i32));
18621 }
18622 }
18623
18624 // For MVETrunc of a buildvector or shuffle, it can be beneficial to lower the
18625 // truncate to a buildvector to allow the generic optimisations to kick in.
18626 if (all_of(N->ops(), [](SDValue Op) {
18627 return Op.getOpcode() == ISD::BUILD_VECTOR ||
18628 Op.getOpcode() == ISD::VECTOR_SHUFFLE ||
18629 (Op.getOpcode() == ISD::BITCAST &&
18630 Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);
18631 })) {
18632 SmallVector<SDValue, 8> Extracts;
18633 for (unsigned Op = 0; Op < N->getNumOperands(); Op++) {
18634 SDValue O = N->getOperand(Op);
18635 for (unsigned i = 0; i < O.getValueType().getVectorNumElements(); i++) {
18636 SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, O,
18637 DAG.getConstant(i, DL, MVT::i32));
18638 Extracts.push_back(Ext);
18639 }
18640 }
18641 return DAG.getBuildVector(VT, DL, Extracts);
18642 }
18643
18644 // If we are late in the legalization process and nothing has optimised
18645 // the trunc to anything better, lower it to a stack store and reload,
18646 // performing the truncation whilst keeping the lanes in the correct order:
18647 // VSTRH.32 a, stack; VSTRH.32 b, stack+8; VLDRW.32 stack;
18648 if (!DCI.isAfterLegalizeDAG())
18649 return SDValue();
18650
18651 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18652 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18653 int NumIns = N->getNumOperands();
18654 assert((NumIns == 2 || NumIns == 4) &&
18655 "Expected 2 or 4 inputs to an MVETrunc");
18656 EVT StoreVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18657 if (N->getNumOperands() == 4)
18658 StoreVT = StoreVT.getHalfNumVectorElementsVT(*DAG.getContext());
18659
18660 SmallVector<SDValue> Chains;
18661 for (int I = 0; I < NumIns; I++) {
18662 SDValue Ptr = DAG.getNode(
18663 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18664 DAG.getConstant(I * 16 / NumIns, DL, StackPtr.getValueType()));
18666 DAG.getMachineFunction(), SPFI, I * 16 / NumIns);
18667 SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), DL, N->getOperand(I),
18668 Ptr, MPI, StoreVT, Align(4));
18669 Chains.push_back(Ch);
18670 }
18671
18672 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18673 MachinePointerInfo MPI =
18675 return DAG.getLoad(VT, DL, Chain, StackPtr, MPI, Align(4));
18676}
18677
18678// Take a MVEEXT(load x) and split that into (extload x, extload x+8)
18680 SelectionDAG &DAG) {
18681 SDValue N0 = N->getOperand(0);
18682 LoadSDNode *LD = dyn_cast<LoadSDNode>(N0.getNode());
18683 if (!LD || !LD->isSimple() || !N0.hasOneUse() || LD->isIndexed())
18684 return SDValue();
18685
18686 EVT FromVT = LD->getMemoryVT();
18687 EVT ToVT = N->getValueType(0);
18688 if (!ToVT.isVector())
18689 return SDValue();
18690 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements() * 2);
18691 EVT ToEltVT = ToVT.getVectorElementType();
18692 EVT FromEltVT = FromVT.getVectorElementType();
18693
18694 unsigned NumElements = 0;
18695 if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
18696 NumElements = 4;
18697 if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
18698 NumElements = 8;
18699 assert(NumElements != 0);
18700
18701 ISD::LoadExtType NewExtType =
18702 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
18703 if (LD->getExtensionType() != ISD::NON_EXTLOAD &&
18704 LD->getExtensionType() != ISD::EXTLOAD &&
18705 LD->getExtensionType() != NewExtType)
18706 return SDValue();
18707
18708 LLVMContext &C = *DAG.getContext();
18709 SDLoc DL(LD);
18710 // Details about the old load
18711 SDValue Ch = LD->getChain();
18712 SDValue BasePtr = LD->getBasePtr();
18713 Align Alignment = LD->getOriginalAlign();
18714 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
18715 AAMDNodes AAInfo = LD->getAAInfo();
18716
18717 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
18718 EVT NewFromVT = EVT::getVectorVT(
18719 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
18720 EVT NewToVT = EVT::getVectorVT(
18721 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
18722
18725 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
18726 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
18727 SDValue NewPtr =
18728 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
18729
18730 SDValue NewLoad =
18731 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
18732 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18733 Alignment, MMOFlags, AAInfo);
18734 Loads.push_back(NewLoad);
18735 Chains.push_back(SDValue(NewLoad.getNode(), 1));
18736 }
18737
18738 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18739 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
18740 return DAG.getMergeValues(Loads, DL);
18741}
18742
18743// Perform combines for MVEEXT. If it has not be optimized to anything better
18744// before lowering, it gets converted to stack store and extloads performing the
18745// extend whilst still keeping the same lane ordering.
18748 SelectionDAG &DAG = DCI.DAG;
18749 EVT VT = N->getValueType(0);
18750 SDLoc DL(N);
18751 assert(N->getNumValues() == 2 && "Expected MVEEXT with 2 elements");
18752 assert((VT == MVT::v4i32 || VT == MVT::v8i16) && "Unexpected MVEEXT type");
18753
18754 EVT ExtVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18755 *DAG.getContext());
18756 auto Extend = [&](SDValue V) {
18757 SDValue VVT = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, V);
18758 return N->getOpcode() == ARMISD::MVESEXT
18759 ? DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, VVT,
18760 DAG.getValueType(ExtVT))
18761 : DAG.getZeroExtendInReg(VVT, DL, ExtVT);
18762 };
18763
18764 // MVEEXT(VDUP) -> SIGN_EXTEND_INREG(VDUP)
18765 if (N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18766 SDValue Ext = Extend(N->getOperand(0));
18767 return DAG.getMergeValues({Ext, Ext}, DL);
18768 }
18769
18770 // MVEEXT(shuffle) -> SIGN_EXTEND_INREG/ZERO_EXTEND_INREG
18771 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0))) {
18772 ArrayRef<int> Mask = SVN->getMask();
18773 assert(Mask.size() == 2 * VT.getVectorNumElements());
18774 assert(Mask.size() == SVN->getValueType(0).getVectorNumElements());
18775 unsigned Rev = VT == MVT::v4i32 ? ARMISD::VREV32 : ARMISD::VREV16;
18776 SDValue Op0 = SVN->getOperand(0);
18777 SDValue Op1 = SVN->getOperand(1);
18778
18779 auto CheckInregMask = [&](int Start, int Offset) {
18780 for (int Idx = 0, E = VT.getVectorNumElements(); Idx < E; ++Idx)
18781 if (Mask[Start + Idx] >= 0 && Mask[Start + Idx] != Idx * 2 + Offset)
18782 return false;
18783 return true;
18784 };
18785 SDValue V0 = SDValue(N, 0);
18786 SDValue V1 = SDValue(N, 1);
18787 if (CheckInregMask(0, 0))
18788 V0 = Extend(Op0);
18789 else if (CheckInregMask(0, 1))
18790 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18791 else if (CheckInregMask(0, Mask.size()))
18792 V0 = Extend(Op1);
18793 else if (CheckInregMask(0, Mask.size() + 1))
18794 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18795
18796 if (CheckInregMask(VT.getVectorNumElements(), Mask.size()))
18797 V1 = Extend(Op1);
18798 else if (CheckInregMask(VT.getVectorNumElements(), Mask.size() + 1))
18799 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18800 else if (CheckInregMask(VT.getVectorNumElements(), 0))
18801 V1 = Extend(Op0);
18802 else if (CheckInregMask(VT.getVectorNumElements(), 1))
18803 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18804
18805 if (V0.getNode() != N || V1.getNode() != N)
18806 return DAG.getMergeValues({V0, V1}, DL);
18807 }
18808
18809 // MVEEXT(load) -> extload, extload
18810 if (N->getOperand(0)->getOpcode() == ISD::LOAD)
18812 return L;
18813
18814 if (!DCI.isAfterLegalizeDAG())
18815 return SDValue();
18816
18817 // Lower to a stack store and reload:
18818 // VSTRW.32 a, stack; VLDRH.32 stack; VLDRH.32 stack+8;
18819 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18820 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18821 int NumOuts = N->getNumValues();
18822 assert((NumOuts == 2 || NumOuts == 4) &&
18823 "Expected 2 or 4 outputs to an MVEEXT");
18824 EVT LoadVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18825 *DAG.getContext());
18826 if (N->getNumOperands() == 4)
18827 LoadVT = LoadVT.getHalfNumVectorElementsVT(*DAG.getContext());
18828
18829 MachinePointerInfo MPI =
18831 SDValue Chain = DAG.getStore(DAG.getEntryNode(), DL, N->getOperand(0),
18832 StackPtr, MPI, Align(4));
18833
18835 for (int I = 0; I < NumOuts; I++) {
18836 SDValue Ptr = DAG.getNode(
18837 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18838 DAG.getConstant(I * 16 / NumOuts, DL, StackPtr.getValueType()));
18840 DAG.getMachineFunction(), SPFI, I * 16 / NumOuts);
18841 SDValue Load = DAG.getExtLoad(
18842 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL,
18843 VT, Chain, Ptr, MPI, LoadVT, Align(4));
18844 Loads.push_back(Load);
18845 }
18846
18847 return DAG.getMergeValues(Loads, DL);
18848}
18849
18851 DAGCombinerInfo &DCI) const {
18852 switch (N->getOpcode()) {
18853 default: break;
18854 case ISD::SELECT_CC:
18855 case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
18856 case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
18857 case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
18858 case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
18859 case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
18860 case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
18861 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
18862 case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
18863 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
18864 case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
18865 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
18866 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
18867 case ISD::BRCOND:
18868 case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
18869 case ARMISD::ADDC:
18870 case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
18871 case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
18872 case ARMISD::BFI: return PerformBFICombine(N, DCI.DAG);
18873 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
18874 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
18875 case ARMISD::VMOVhr: return PerformVMOVhrCombine(N, DCI);
18876 case ARMISD::VMOVrh: return PerformVMOVrhCombine(N, DCI.DAG);
18877 case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
18878 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
18881 return PerformExtractEltCombine(N, DCI, Subtarget);
18885 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
18886 case ARMISD::VDUP: return PerformVDUPCombine(N, DCI.DAG, Subtarget);
18887 case ISD::FP_TO_SINT:
18888 case ISD::FP_TO_UINT:
18889 return PerformVCVTCombine(N, DCI.DAG, Subtarget);
18890 case ISD::FADD:
18891 return PerformFADDCombine(N, DCI.DAG, Subtarget);
18892 case ISD::FDIV:
18893 return PerformVDIVCombine(N, DCI.DAG, Subtarget);
18895 return PerformIntrinsicCombine(N, DCI);
18896 case ISD::SHL:
18897 case ISD::SRA:
18898 case ISD::SRL:
18899 return PerformShiftCombine(N, DCI, Subtarget);
18900 case ISD::SIGN_EXTEND:
18901 case ISD::ZERO_EXTEND:
18902 case ISD::ANY_EXTEND:
18903 return PerformExtendCombine(N, DCI.DAG, Subtarget);
18904 case ISD::FP_EXTEND:
18905 return PerformFPExtendCombine(N, DCI.DAG, Subtarget);
18906 case ISD::SMIN:
18907 case ISD::UMIN:
18908 case ISD::SMAX:
18909 case ISD::UMAX:
18910 return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
18911 case ARMISD::CMOV:
18912 return PerformCMOVCombine(N, DCI.DAG);
18913 case ARMISD::BRCOND:
18914 return PerformBRCONDCombine(N, DCI.DAG);
18915 case ARMISD::CMPZ:
18916 return PerformCMPZCombine(N, DCI.DAG);
18917 case ARMISD::CSINC:
18918 case ARMISD::CSINV:
18919 case ARMISD::CSNEG:
18920 return PerformCSETCombine(N, DCI.DAG);
18921 case ISD::LOAD:
18922 return PerformLOADCombine(N, DCI, Subtarget);
18923 case ARMISD::VLD1DUP:
18924 case ARMISD::VLD2DUP:
18925 case ARMISD::VLD3DUP:
18926 case ARMISD::VLD4DUP:
18927 return PerformVLDCombine(N, DCI);
18929 return PerformARMBUILD_VECTORCombine(N, DCI);
18930 case ISD::BITCAST:
18931 return PerformBITCASTCombine(N, DCI, Subtarget);
18933 return PerformPREDICATE_CASTCombine(N, DCI);
18935 return PerformVECTOR_REG_CASTCombine(N, DCI.DAG, Subtarget);
18936 case ARMISD::MVETRUNC:
18937 return PerformMVETruncCombine(N, DCI);
18938 case ARMISD::MVESEXT:
18939 case ARMISD::MVEZEXT:
18940 return PerformMVEExtCombine(N, DCI);
18941 case ARMISD::VCMP:
18942 return PerformVCMPCombine(N, DCI.DAG, Subtarget);
18943 case ISD::VECREDUCE_ADD:
18944 return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
18945 case ARMISD::VADDVs:
18946 case ARMISD::VADDVu:
18947 case ARMISD::VADDLVs:
18948 case ARMISD::VADDLVu:
18949 case ARMISD::VADDLVAs:
18950 case ARMISD::VADDLVAu:
18951 case ARMISD::VMLAVs:
18952 case ARMISD::VMLAVu:
18953 case ARMISD::VMLALVs:
18954 case ARMISD::VMLALVu:
18955 case ARMISD::VMLALVAs:
18956 case ARMISD::VMLALVAu:
18957 return PerformReduceShuffleCombine(N, DCI.DAG);
18958 case ARMISD::VMOVN:
18959 return PerformVMOVNCombine(N, DCI);
18960 case ARMISD::VQMOVNs:
18961 case ARMISD::VQMOVNu:
18962 return PerformVQMOVNCombine(N, DCI);
18963 case ARMISD::VQDMULH:
18964 return PerformVQDMULHCombine(N, DCI);
18965 case ARMISD::ASRL:
18966 case ARMISD::LSRL:
18967 case ARMISD::LSLL:
18968 return PerformLongShiftCombine(N, DCI.DAG);
18969 case ARMISD::SMULWB: {
18970 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18971 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
18972 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
18973 return SDValue();
18974 break;
18975 }
18976 case ARMISD::SMULWT: {
18977 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18978 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
18979 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
18980 return SDValue();
18981 break;
18982 }
18983 case ARMISD::SMLALBB:
18984 case ARMISD::QADD16b:
18985 case ARMISD::QSUB16b:
18986 case ARMISD::UQADD16b:
18987 case ARMISD::UQSUB16b: {
18988 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18989 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
18990 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
18991 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
18992 return SDValue();
18993 break;
18994 }
18995 case ARMISD::SMLALBT: {
18996 unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
18997 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
18998 unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
18999 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19000 if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
19001 (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
19002 return SDValue();
19003 break;
19004 }
19005 case ARMISD::SMLALTB: {
19006 unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
19007 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19008 unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
19009 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19010 if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
19011 (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
19012 return SDValue();
19013 break;
19014 }
19015 case ARMISD::SMLALTT: {
19016 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19017 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
19018 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19019 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19020 return SDValue();
19021 break;
19022 }
19023 case ARMISD::QADD8b:
19024 case ARMISD::QSUB8b:
19025 case ARMISD::UQADD8b:
19026 case ARMISD::UQSUB8b: {
19027 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19028 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
19029 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19030 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19031 return SDValue();
19032 break;
19033 }
19036 switch (N->getConstantOperandVal(1)) {
19037 case Intrinsic::arm_neon_vld1:
19038 case Intrinsic::arm_neon_vld1x2:
19039 case Intrinsic::arm_neon_vld1x3:
19040 case Intrinsic::arm_neon_vld1x4:
19041 case Intrinsic::arm_neon_vld2:
19042 case Intrinsic::arm_neon_vld3:
19043 case Intrinsic::arm_neon_vld4:
19044 case Intrinsic::arm_neon_vld2lane:
19045 case Intrinsic::arm_neon_vld3lane:
19046 case Intrinsic::arm_neon_vld4lane:
19047 case Intrinsic::arm_neon_vld2dup:
19048 case Intrinsic::arm_neon_vld3dup:
19049 case Intrinsic::arm_neon_vld4dup:
19050 case Intrinsic::arm_neon_vst1:
19051 case Intrinsic::arm_neon_vst1x2:
19052 case Intrinsic::arm_neon_vst1x3:
19053 case Intrinsic::arm_neon_vst1x4:
19054 case Intrinsic::arm_neon_vst2:
19055 case Intrinsic::arm_neon_vst3:
19056 case Intrinsic::arm_neon_vst4:
19057 case Intrinsic::arm_neon_vst2lane:
19058 case Intrinsic::arm_neon_vst3lane:
19059 case Intrinsic::arm_neon_vst4lane:
19060 return PerformVLDCombine(N, DCI);
19061 case Intrinsic::arm_mve_vld2q:
19062 case Intrinsic::arm_mve_vld4q:
19063 case Intrinsic::arm_mve_vst2q:
19064 case Intrinsic::arm_mve_vst4q:
19065 return PerformMVEVLDCombine(N, DCI);
19066 default: break;
19067 }
19068 break;
19069 }
19070 return SDValue();
19071}
19072
19074 EVT VT) const {
19075 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
19076}
19077
19079 Align Alignment,
19081 unsigned *Fast) const {
19082 // Depends what it gets converted into if the type is weird.
19083 if (!VT.isSimple())
19084 return false;
19085
19086 // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
19087 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
19088 auto Ty = VT.getSimpleVT().SimpleTy;
19089
19090 if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
19091 // Unaligned access can use (for example) LRDB, LRDH, LDR
19092 if (AllowsUnaligned) {
19093 if (Fast)
19094 *Fast = Subtarget->hasV7Ops();
19095 return true;
19096 }
19097 }
19098
19099 if (Ty == MVT::f64 || Ty == MVT::v2f64) {
19100 // For any little-endian targets with neon, we can support unaligned ld/st
19101 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
19102 // A big-endian target may also explicitly support unaligned accesses
19103 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
19104 if (Fast)
19105 *Fast = 1;
19106 return true;
19107 }
19108 }
19109
19110 if (!Subtarget->hasMVEIntegerOps())
19111 return false;
19112
19113 // These are for predicates
19114 if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
19115 Ty == MVT::v2i1)) {
19116 if (Fast)
19117 *Fast = 1;
19118 return true;
19119 }
19120
19121 // These are for truncated stores/narrowing loads. They are fine so long as
19122 // the alignment is at least the size of the item being loaded
19123 if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
19124 Alignment >= VT.getScalarSizeInBits() / 8) {
19125 if (Fast)
19126 *Fast = true;
19127 return true;
19128 }
19129
19130 // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
19131 // VSTRW.U32 all store the vector register in exactly the same format, and
19132 // differ only in the range of their immediate offset field and the required
19133 // alignment. So there is always a store that can be used, regardless of
19134 // actual type.
19135 //
19136 // For big endian, that is not the case. But can still emit a (VSTRB.U8;
19137 // VREV64.8) pair and get the same effect. This will likely be better than
19138 // aligning the vector through the stack.
19139 if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
19140 Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
19141 Ty == MVT::v2f64) {
19142 if (Fast)
19143 *Fast = 1;
19144 return true;
19145 }
19146
19147 return false;
19148}
19149
19150
19152 const MemOp &Op, const AttributeList &FuncAttributes) const {
19153 // See if we can use NEON instructions for this...
19154 if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
19155 !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
19156 unsigned Fast;
19157 if (Op.size() >= 16 &&
19158 (Op.isAligned(Align(16)) ||
19159 (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),
19161 Fast))) {
19162 return MVT::v2f64;
19163 } else if (Op.size() >= 8 &&
19164 (Op.isAligned(Align(8)) ||
19166 MVT::f64, 0, Align(1), MachineMemOperand::MONone, &Fast) &&
19167 Fast))) {
19168 return MVT::f64;
19169 }
19170 }
19171
19172 // Let the target-independent logic figure it out.
19173 return MVT::Other;
19174}
19175
19176// 64-bit integers are split into their high and low parts and held in two
19177// different registers, so the trunc is free since the low register can just
19178// be used.
19179bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
19180 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
19181 return false;
19182 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
19183 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
19184 return (SrcBits == 64 && DestBits == 32);
19185}
19186
19188 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
19189 !DstVT.isInteger())
19190 return false;
19191 unsigned SrcBits = SrcVT.getSizeInBits();
19192 unsigned DestBits = DstVT.getSizeInBits();
19193 return (SrcBits == 64 && DestBits == 32);
19194}
19195
19197 if (Val.getOpcode() != ISD::LOAD)
19198 return false;
19199
19200 EVT VT1 = Val.getValueType();
19201 if (!VT1.isSimple() || !VT1.isInteger() ||
19202 !VT2.isSimple() || !VT2.isInteger())
19203 return false;
19204
19205 switch (VT1.getSimpleVT().SimpleTy) {
19206 default: break;
19207 case MVT::i1:
19208 case MVT::i8:
19209 case MVT::i16:
19210 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
19211 return true;
19212 }
19213
19214 return false;
19215}
19216
19218 if (!VT.isSimple())
19219 return false;
19220
19221 // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
19222 // negate values directly (fneg is free). So, we don't want to let the DAG
19223 // combiner rewrite fneg into xors and some other instructions. For f16 and
19224 // FullFP16 argument passing, some bitcast nodes may be introduced,
19225 // triggering this DAG combine rewrite, so we are avoiding that with this.
19226 switch (VT.getSimpleVT().SimpleTy) {
19227 default: break;
19228 case MVT::f16:
19229 return Subtarget->hasFullFP16();
19230 }
19231
19232 return false;
19233}
19234
19235/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
19236/// of the vector elements.
19237static bool areExtractExts(Value *Ext1, Value *Ext2) {
19238 auto areExtDoubled = [](Instruction *Ext) {
19239 return Ext->getType()->getScalarSizeInBits() ==
19240 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
19241 };
19242
19243 if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
19244 !match(Ext2, m_ZExtOrSExt(m_Value())) ||
19245 !areExtDoubled(cast<Instruction>(Ext1)) ||
19246 !areExtDoubled(cast<Instruction>(Ext2)))
19247 return false;
19248
19249 return true;
19250}
19251
19252/// Check if sinking \p I's operands to I's basic block is profitable, because
19253/// the operands can be folded into a target instruction, e.g.
19254/// sext/zext can be folded into vsubl.
19256 SmallVectorImpl<Use *> &Ops) const {
19257 if (!I->getType()->isVectorTy())
19258 return false;
19259
19260 if (Subtarget->hasNEON()) {
19261 switch (I->getOpcode()) {
19262 case Instruction::Sub:
19263 case Instruction::Add: {
19264 if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
19265 return false;
19266 Ops.push_back(&I->getOperandUse(0));
19267 Ops.push_back(&I->getOperandUse(1));
19268 return true;
19269 }
19270 default:
19271 return false;
19272 }
19273 }
19274
19275 if (!Subtarget->hasMVEIntegerOps())
19276 return false;
19277
19278 auto IsFMSMul = [&](Instruction *I) {
19279 if (!I->hasOneUse())
19280 return false;
19281 auto *Sub = cast<Instruction>(*I->users().begin());
19282 return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I;
19283 };
19284 auto IsFMS = [&](Instruction *I) {
19285 if (match(I->getOperand(0), m_FNeg(m_Value())) ||
19286 match(I->getOperand(1), m_FNeg(m_Value())))
19287 return true;
19288 return false;
19289 };
19290
19291 auto IsSinker = [&](Instruction *I, int Operand) {
19292 switch (I->getOpcode()) {
19293 case Instruction::Add:
19294 case Instruction::Mul:
19295 case Instruction::FAdd:
19296 case Instruction::ICmp:
19297 case Instruction::FCmp:
19298 return true;
19299 case Instruction::FMul:
19300 return !IsFMSMul(I);
19301 case Instruction::Sub:
19302 case Instruction::FSub:
19303 case Instruction::Shl:
19304 case Instruction::LShr:
19305 case Instruction::AShr:
19306 return Operand == 1;
19307 case Instruction::Call:
19308 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
19309 switch (II->getIntrinsicID()) {
19310 case Intrinsic::fma:
19311 return !IsFMS(I);
19312 case Intrinsic::sadd_sat:
19313 case Intrinsic::uadd_sat:
19314 case Intrinsic::arm_mve_add_predicated:
19315 case Intrinsic::arm_mve_mul_predicated:
19316 case Intrinsic::arm_mve_qadd_predicated:
19317 case Intrinsic::arm_mve_vhadd:
19318 case Intrinsic::arm_mve_hadd_predicated:
19319 case Intrinsic::arm_mve_vqdmull:
19320 case Intrinsic::arm_mve_vqdmull_predicated:
19321 case Intrinsic::arm_mve_vqdmulh:
19322 case Intrinsic::arm_mve_qdmulh_predicated:
19323 case Intrinsic::arm_mve_vqrdmulh:
19324 case Intrinsic::arm_mve_qrdmulh_predicated:
19325 case Intrinsic::arm_mve_fma_predicated:
19326 return true;
19327 case Intrinsic::ssub_sat:
19328 case Intrinsic::usub_sat:
19329 case Intrinsic::arm_mve_sub_predicated:
19330 case Intrinsic::arm_mve_qsub_predicated:
19331 case Intrinsic::arm_mve_hsub_predicated:
19332 case Intrinsic::arm_mve_vhsub:
19333 return Operand == 1;
19334 default:
19335 return false;
19336 }
19337 }
19338 return false;
19339 default:
19340 return false;
19341 }
19342 };
19343
19344 for (auto OpIdx : enumerate(I->operands())) {
19345 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
19346 // Make sure we are not already sinking this operand
19347 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
19348 continue;
19349
19350 Instruction *Shuffle = Op;
19351 if (Shuffle->getOpcode() == Instruction::BitCast)
19352 Shuffle = dyn_cast<Instruction>(Shuffle->getOperand(0));
19353 // We are looking for a splat that can be sunk.
19354 if (!Shuffle ||
19355 !match(Shuffle, m_Shuffle(
19357 m_Undef(), m_ZeroMask())))
19358 continue;
19359 if (!IsSinker(I, OpIdx.index()))
19360 continue;
19361
19362 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
19363 // and vector registers
19364 for (Use &U : Op->uses()) {
19365 Instruction *Insn = cast<Instruction>(U.getUser());
19366 if (!IsSinker(Insn, U.getOperandNo()))
19367 return false;
19368 }
19369
19370 Ops.push_back(&Shuffle->getOperandUse(0));
19371 if (Shuffle != Op)
19372 Ops.push_back(&Op->getOperandUse(0));
19373 Ops.push_back(&OpIdx.value());
19374 }
19375 return true;
19376}
19377
19379 if (!Subtarget->hasMVEIntegerOps())
19380 return nullptr;
19381 Type *SVIType = SVI->getType();
19382 Type *ScalarType = SVIType->getScalarType();
19383
19384 if (ScalarType->isFloatTy())
19385 return Type::getInt32Ty(SVIType->getContext());
19386 if (ScalarType->isHalfTy())
19387 return Type::getInt16Ty(SVIType->getContext());
19388 return nullptr;
19389}
19390
19392 EVT VT = ExtVal.getValueType();
19393
19394 if (!isTypeLegal(VT))
19395 return false;
19396
19397 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
19398 if (Ld->isExpandingLoad())
19399 return false;
19400 }
19401
19402 if (Subtarget->hasMVEIntegerOps())
19403 return true;
19404
19405 // Don't create a loadext if we can fold the extension into a wide/long
19406 // instruction.
19407 // If there's more than one user instruction, the loadext is desirable no
19408 // matter what. There can be two uses by the same instruction.
19409 if (ExtVal->use_empty() ||
19410 !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
19411 return true;
19412
19413 SDNode *U = *ExtVal->use_begin();
19414 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
19415 U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19416 return false;
19417
19418 return true;
19419}
19420
19422 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19423 return false;
19424
19425 if (!isTypeLegal(EVT::getEVT(Ty1)))
19426 return false;
19427
19428 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
19429
19430 // Assuming the caller doesn't have a zeroext or signext return parameter,
19431 // truncation all the way down to i1 is valid.
19432 return true;
19433}
19434
19435/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
19436/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
19437/// expanded to FMAs when this method returns true, otherwise fmuladd is
19438/// expanded to fmul + fadd.
19439///
19440/// ARM supports both fused and unfused multiply-add operations; we already
19441/// lower a pair of fmul and fadd to the latter so it's not clear that there
19442/// would be a gain or that the gain would be worthwhile enough to risk
19443/// correctness bugs.
19444///
19445/// For MVE, we set this to true as it helps simplify the need for some
19446/// patterns (and we don't have the non-fused floating point instruction).
19447bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19448 EVT VT) const {
19449 if (!VT.isSimple())
19450 return false;
19451
19452 switch (VT.getSimpleVT().SimpleTy) {
19453 case MVT::v4f32:
19454 case MVT::v8f16:
19455 return Subtarget->hasMVEFloatOps();
19456 case MVT::f16:
19457 return Subtarget->useFPVFMx16();
19458 case MVT::f32:
19459 return Subtarget->useFPVFMx();
19460 case MVT::f64:
19461 return Subtarget->useFPVFMx64();
19462 default:
19463 break;
19464 }
19465
19466 return false;
19467}
19468
19469static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
19470 if (V < 0)
19471 return false;
19472
19473 unsigned Scale = 1;
19474 switch (VT.getSimpleVT().SimpleTy) {
19475 case MVT::i1:
19476 case MVT::i8:
19477 // Scale == 1;
19478 break;
19479 case MVT::i16:
19480 // Scale == 2;
19481 Scale = 2;
19482 break;
19483 default:
19484 // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
19485 // Scale == 4;
19486 Scale = 4;
19487 break;
19488 }
19489
19490 if ((V & (Scale - 1)) != 0)
19491 return false;
19492 return isUInt<5>(V / Scale);
19493}
19494
19495static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
19496 const ARMSubtarget *Subtarget) {
19497 if (!VT.isInteger() && !VT.isFloatingPoint())
19498 return false;
19499 if (VT.isVector() && Subtarget->hasNEON())
19500 return false;
19501 if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
19502 !Subtarget->hasMVEFloatOps())
19503 return false;
19504
19505 bool IsNeg = false;
19506 if (V < 0) {
19507 IsNeg = true;
19508 V = -V;
19509 }
19510
19511 unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);
19512
19513 // MVE: size * imm7
19514 if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
19515 switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
19516 case MVT::i32:
19517 case MVT::f32:
19518 return isShiftedUInt<7,2>(V);
19519 case MVT::i16:
19520 case MVT::f16:
19521 return isShiftedUInt<7,1>(V);
19522 case MVT::i8:
19523 return isUInt<7>(V);
19524 default:
19525 return false;
19526 }
19527 }
19528
19529 // half VLDR: 2 * imm8
19530 if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19531 return isShiftedUInt<8, 1>(V);
19532 // VLDR and LDRD: 4 * imm8
19533 if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
19534 return isShiftedUInt<8, 2>(V);
19535
19536 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
19537 // + imm12 or - imm8
19538 if (IsNeg)
19539 return isUInt<8>(V);
19540 return isUInt<12>(V);
19541 }
19542
19543 return false;
19544}
19545
19546/// isLegalAddressImmediate - Return true if the integer value can be used
19547/// as the offset of the target addressing mode for load / store of the
19548/// given type.
19549static bool isLegalAddressImmediate(int64_t V, EVT VT,
19550 const ARMSubtarget *Subtarget) {
19551 if (V == 0)
19552 return true;
19553
19554 if (!VT.isSimple())
19555 return false;
19556
19557 if (Subtarget->isThumb1Only())
19558 return isLegalT1AddressImmediate(V, VT);
19559 else if (Subtarget->isThumb2())
19560 return isLegalT2AddressImmediate(V, VT, Subtarget);
19561
19562 // ARM mode.
19563 if (V < 0)
19564 V = - V;
19565 switch (VT.getSimpleVT().SimpleTy) {
19566 default: return false;
19567 case MVT::i1:
19568 case MVT::i8:
19569 case MVT::i32:
19570 // +- imm12
19571 return isUInt<12>(V);
19572 case MVT::i16:
19573 // +- imm8
19574 return isUInt<8>(V);
19575 case MVT::f32:
19576 case MVT::f64:
19577 if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
19578 return false;
19579 return isShiftedUInt<8, 2>(V);
19580 }
19581}
19582
19584 EVT VT) const {
19585 int Scale = AM.Scale;
19586 if (Scale < 0)
19587 return false;
19588
19589 switch (VT.getSimpleVT().SimpleTy) {
19590 default: return false;
19591 case MVT::i1:
19592 case MVT::i8:
19593 case MVT::i16:
19594 case MVT::i32:
19595 if (Scale == 1)
19596 return true;
19597 // r + r << imm
19598 Scale = Scale & ~1;
19599 return Scale == 2 || Scale == 4 || Scale == 8;
19600 case MVT::i64:
19601 // FIXME: What are we trying to model here? ldrd doesn't have an r + r
19602 // version in Thumb mode.
19603 // r + r
19604 if (Scale == 1)
19605 return true;
19606 // r * 2 (this can be lowered to r + r).
19607 if (!AM.HasBaseReg && Scale == 2)
19608 return true;
19609 return false;
19610 case MVT::isVoid:
19611 // Note, we allow "void" uses (basically, uses that aren't loads or
19612 // stores), because arm allows folding a scale into many arithmetic
19613 // operations. This should be made more precise and revisited later.
19614
19615 // Allow r << imm, but the imm has to be a multiple of two.
19616 if (Scale & 1) return false;
19617 return isPowerOf2_32(Scale);
19618 }
19619}
19620
19622 EVT VT) const {
19623 const int Scale = AM.Scale;
19624
19625 // Negative scales are not supported in Thumb1.
19626 if (Scale < 0)
19627 return false;
19628
19629 // Thumb1 addressing modes do not support register scaling excepting the
19630 // following cases:
19631 // 1. Scale == 1 means no scaling.
19632 // 2. Scale == 2 this can be lowered to r + r if there is no base register.
19633 return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
19634}
19635
19636/// isLegalAddressingMode - Return true if the addressing mode represented
19637/// by AM is legal for this target, for a load/store of the specified type.
19639 const AddrMode &AM, Type *Ty,
19640 unsigned AS, Instruction *I) const {
19641 EVT VT = getValueType(DL, Ty, true);
19642 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
19643 return false;
19644
19645 // Can never fold addr of global into load/store.
19646 if (AM.BaseGV)
19647 return false;
19648
19649 switch (AM.Scale) {
19650 case 0: // no scale reg, must be "r+i" or "r", or "i".
19651 break;
19652 default:
19653 // ARM doesn't support any R+R*scale+imm addr modes.
19654 if (AM.BaseOffs)
19655 return false;
19656
19657 if (!VT.isSimple())
19658 return false;
19659
19660 if (Subtarget->isThumb1Only())
19661 return isLegalT1ScaledAddressingMode(AM, VT);
19662
19663 if (Subtarget->isThumb2())
19664 return isLegalT2ScaledAddressingMode(AM, VT);
19665
19666 int Scale = AM.Scale;
19667 switch (VT.getSimpleVT().SimpleTy) {
19668 default: return false;
19669 case MVT::i1:
19670 case MVT::i8:
19671 case MVT::i32:
19672 if (Scale < 0) Scale = -Scale;
19673 if (Scale == 1)
19674 return true;
19675 // r + r << imm
19676 return isPowerOf2_32(Scale & ~1);
19677 case MVT::i16:
19678 case MVT::i64:
19679 // r +/- r
19680 if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
19681 return true;
19682 // r * 2 (this can be lowered to r + r).
19683 if (!AM.HasBaseReg && Scale == 2)
19684 return true;
19685 return false;
19686
19687 case MVT::isVoid:
19688 // Note, we allow "void" uses (basically, uses that aren't loads or
19689 // stores), because arm allows folding a scale into many arithmetic
19690 // operations. This should be made more precise and revisited later.
19691
19692 // Allow r << imm, but the imm has to be a multiple of two.
19693 if (Scale & 1) return false;
19694 return isPowerOf2_32(Scale);
19695 }
19696 }
19697 return true;
19698}
19699
19700/// isLegalICmpImmediate - Return true if the specified immediate is legal
19701/// icmp immediate, that is the target has icmp instructions which can compare
19702/// a register against the immediate without having to materialize the
19703/// immediate into a register.
19705 // Thumb2 and ARM modes can use cmn for negative immediates.
19706 if (!Subtarget->isThumb())
19707 return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
19708 ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
19709 if (Subtarget->isThumb2())
19710 return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
19711 ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
19712 // Thumb1 doesn't have cmn, and only 8-bit immediates.
19713 return Imm >= 0 && Imm <= 255;
19714}
19715
19716/// isLegalAddImmediate - Return true if the specified immediate is a legal add
19717/// *or sub* immediate, that is the target has add or sub instructions which can
19718/// add a register with the immediate without having to materialize the
19719/// immediate into a register.
19721 // Same encoding for add/sub, just flip the sign.
19722 int64_t AbsImm = std::abs(Imm);
19723 if (!Subtarget->isThumb())
19724 return ARM_AM::getSOImmVal(AbsImm) != -1;
19725 if (Subtarget->isThumb2())
19726 return ARM_AM::getT2SOImmVal(AbsImm) != -1;
19727 // Thumb1 only has 8-bit unsigned immediate.
19728 return AbsImm >= 0 && AbsImm <= 255;
19729}
19730
19731// Return false to prevent folding
19732// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
19733// if the folding leads to worse code.
19735 SDValue ConstNode) const {
19736 // Let the DAGCombiner decide for vector types and large types.
19737 const EVT VT = AddNode.getValueType();
19738 if (VT.isVector() || VT.getScalarSizeInBits() > 32)
19739 return true;
19740
19741 // It is worse if c0 is legal add immediate, while c1*c0 is not
19742 // and has to be composed by at least two instructions.
19743 const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));
19744 const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);
19745 const int64_t C0 = C0Node->getSExtValue();
19746 APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
19748 return true;
19749 if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)
19750 return false;
19751
19752 // Default to true and let the DAGCombiner decide.
19753 return true;
19754}
19755
19757 bool isSEXTLoad, SDValue &Base,
19758 SDValue &Offset, bool &isInc,
19759 SelectionDAG &DAG) {
19760 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19761 return false;
19762
19763 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
19764 // AddressingMode 3
19765 Base = Ptr->getOperand(0);
19766 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19767 int RHSC = (int)RHS->getZExtValue();
19768 if (RHSC < 0 && RHSC > -256) {
19769 assert(Ptr->getOpcode() == ISD::ADD);
19770 isInc = false;
19771 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19772 return true;
19773 }
19774 }
19775 isInc = (Ptr->getOpcode() == ISD::ADD);
19776 Offset = Ptr->getOperand(1);
19777 return true;
19778 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
19779 // AddressingMode 2
19780 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19781 int RHSC = (int)RHS->getZExtValue();
19782 if (RHSC < 0 && RHSC > -0x1000) {
19783 assert(Ptr->getOpcode() == ISD::ADD);
19784 isInc = false;
19785 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19786 Base = Ptr->getOperand(0);
19787 return true;
19788 }
19789 }
19790
19791 if (Ptr->getOpcode() == ISD::ADD) {
19792 isInc = true;
19793 ARM_AM::ShiftOpc ShOpcVal=
19794 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
19795 if (ShOpcVal != ARM_AM::no_shift) {
19796 Base = Ptr->getOperand(1);
19797 Offset = Ptr->getOperand(0);
19798 } else {
19799 Base = Ptr->getOperand(0);
19800 Offset = Ptr->getOperand(1);
19801 }
19802 return true;
19803 }
19804
19805 isInc = (Ptr->getOpcode() == ISD::ADD);
19806 Base = Ptr->getOperand(0);
19807 Offset = Ptr->getOperand(1);
19808 return true;
19809 }
19810
19811 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
19812 return false;
19813}
19814
19816 bool isSEXTLoad, SDValue &Base,
19817 SDValue &Offset, bool &isInc,
19818 SelectionDAG &DAG) {
19819 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19820 return false;
19821
19822 Base = Ptr->getOperand(0);
19823 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19824 int RHSC = (int)RHS->getZExtValue();
19825 if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
19826 assert(Ptr->getOpcode() == ISD::ADD);
19827 isInc = false;
19828 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19829 return true;
19830 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
19831 isInc = Ptr->getOpcode() == ISD::ADD;
19832 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19833 return true;
19834 }
19835 }
19836
19837 return false;
19838}
19839
19840static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment,
19841 bool isSEXTLoad, bool IsMasked, bool isLE,
19843 bool &isInc, SelectionDAG &DAG) {
19844 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19845 return false;
19846 if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
19847 return false;
19848
19849 // We allow LE non-masked loads to change the type (for example use a vldrb.8
19850 // as opposed to a vldrw.32). This can allow extra addressing modes or
19851 // alignments for what is otherwise an equivalent instruction.
19852 bool CanChangeType = isLE && !IsMasked;
19853
19854 ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
19855 int RHSC = (int)RHS->getZExtValue();
19856
19857 auto IsInRange = [&](int RHSC, int Limit, int Scale) {
19858 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19859 assert(Ptr->getOpcode() == ISD::ADD);
19860 isInc = false;
19861 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19862 return true;
19863 } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
19864 isInc = Ptr->getOpcode() == ISD::ADD;
19865 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19866 return true;
19867 }
19868 return false;
19869 };
19870
19871 // Try to find a matching instruction based on s/zext, Alignment, Offset and
19872 // (in BE/masked) type.
19873 Base = Ptr->getOperand(0);
19874 if (VT == MVT::v4i16) {
19875 if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
19876 return true;
19877 } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
19878 if (IsInRange(RHSC, 0x80, 1))
19879 return true;
19880 } else if (Alignment >= 4 &&
19881 (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
19882 IsInRange(RHSC, 0x80, 4))
19883 return true;
19884 else if (Alignment >= 2 &&
19885 (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
19886 IsInRange(RHSC, 0x80, 2))
19887 return true;
19888 else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
19889 return true;
19890 return false;
19891}
19892
19893/// getPreIndexedAddressParts - returns true by value, base pointer and
19894/// offset pointer and addressing mode by reference if the node's address
19895/// can be legally represented as pre-indexed load / store address.
19896bool
19898 SDValue &Offset,
19900 SelectionDAG &DAG) const {
19901 if (Subtarget->isThumb1Only())
19902 return false;
19903
19904 EVT VT;
19905 SDValue Ptr;
19906 Align Alignment;
19907 bool isSEXTLoad = false;
19908 bool IsMasked = false;
19909 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19910 Ptr = LD->getBasePtr();
19911 VT = LD->getMemoryVT();
19912 Alignment = LD->getAlign();
19913 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19914 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19915 Ptr = ST->getBasePtr();
19916 VT = ST->getMemoryVT();
19917 Alignment = ST->getAlign();
19918 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19919 Ptr = LD->getBasePtr();
19920 VT = LD->getMemoryVT();
19921 Alignment = LD->getAlign();
19922 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19923 IsMasked = true;
19924 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19925 Ptr = ST->getBasePtr();
19926 VT = ST->getMemoryVT();
19927 Alignment = ST->getAlign();
19928 IsMasked = true;
19929 } else
19930 return false;
19931
19932 bool isInc;
19933 bool isLegal = false;
19934 if (VT.isVector())
19935 isLegal = Subtarget->hasMVEIntegerOps() &&
19937 Ptr.getNode(), VT, Alignment, isSEXTLoad, IsMasked,
19938 Subtarget->isLittle(), Base, Offset, isInc, DAG);
19939 else {
19940 if (Subtarget->isThumb2())
19941 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19942 Offset, isInc, DAG);
19943 else
19944 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19945 Offset, isInc, DAG);
19946 }
19947 if (!isLegal)
19948 return false;
19949
19950 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
19951 return true;
19952}
19953
19954/// getPostIndexedAddressParts - returns true by value, base pointer and
19955/// offset pointer and addressing mode by reference if this node can be
19956/// combined with a load / store to form a post-indexed load / store.
19958 SDValue &Base,
19959 SDValue &Offset,
19961 SelectionDAG &DAG) const {
19962 EVT VT;
19963 SDValue Ptr;
19964 Align Alignment;
19965 bool isSEXTLoad = false, isNonExt;
19966 bool IsMasked = false;
19967 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19968 VT = LD->getMemoryVT();
19969 Ptr = LD->getBasePtr();
19970 Alignment = LD->getAlign();
19971 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19972 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19973 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19974 VT = ST->getMemoryVT();
19975 Ptr = ST->getBasePtr();
19976 Alignment = ST->getAlign();
19977 isNonExt = !ST->isTruncatingStore();
19978 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19979 VT = LD->getMemoryVT();
19980 Ptr = LD->getBasePtr();
19981 Alignment = LD->getAlign();
19982 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19983 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19984 IsMasked = true;
19985 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19986 VT = ST->getMemoryVT();
19987 Ptr = ST->getBasePtr();
19988 Alignment = ST->getAlign();
19989 isNonExt = !ST->isTruncatingStore();
19990 IsMasked = true;
19991 } else
19992 return false;
19993
19994 if (Subtarget->isThumb1Only()) {
19995 // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
19996 // must be non-extending/truncating, i32, with an offset of 4.
19997 assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
19998 if (Op->getOpcode() != ISD::ADD || !isNonExt)
19999 return false;
20000 auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
20001 if (!RHS || RHS->getZExtValue() != 4)
20002 return false;
20003 if (Alignment < Align(4))
20004 return false;
20005
20006 Offset = Op->getOperand(1);
20007 Base = Op->getOperand(0);
20008 AM = ISD::POST_INC;
20009 return true;
20010 }
20011
20012 bool isInc;
20013 bool isLegal = false;
20014 if (VT.isVector())
20015 isLegal = Subtarget->hasMVEIntegerOps() &&
20016 getMVEIndexedAddressParts(Op, VT, Alignment, isSEXTLoad, IsMasked,
20017 Subtarget->isLittle(), Base, Offset,
20018 isInc, DAG);
20019 else {
20020 if (Subtarget->isThumb2())
20021 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
20022 isInc, DAG);
20023 else
20024 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
20025 isInc, DAG);
20026 }
20027 if (!isLegal)
20028 return false;
20029
20030 if (Ptr != Base) {
20031 // Swap base ptr and offset to catch more post-index load / store when
20032 // it's legal. In Thumb2 mode, offset must be an immediate.
20033 if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
20034 !Subtarget->isThumb2())
20036
20037 // Post-indexed load / store update the base pointer.
20038 if (Ptr != Base)
20039 return false;
20040 }
20041
20042 AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
20043 return true;
20044}
20045
20047 KnownBits &Known,
20048 const APInt &DemandedElts,
20049 const SelectionDAG &DAG,
20050 unsigned Depth) const {
20051 unsigned BitWidth = Known.getBitWidth();
20052 Known.resetAll();
20053 switch (Op.getOpcode()) {
20054 default: break;
20055 case ARMISD::ADDC:
20056 case ARMISD::ADDE:
20057 case ARMISD::SUBC:
20058 case ARMISD::SUBE:
20059 // Special cases when we convert a carry to a boolean.
20060 if (Op.getResNo() == 0) {
20061 SDValue LHS = Op.getOperand(0);
20062 SDValue RHS = Op.getOperand(1);
20063 // (ADDE 0, 0, C) will give us a single bit.
20064 if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
20067 return;
20068 }
20069 }
20070 break;
20071 case ARMISD::CMOV: {
20072 // Bits are known zero/one if known on the LHS and RHS.
20073 Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
20074 if (Known.isUnknown())
20075 return;
20076
20077 KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
20078 Known = Known.intersectWith(KnownRHS);
20079 return;
20080 }
20082 Intrinsic::ID IntID =
20083 static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
20084 switch (IntID) {
20085 default: return;
20086 case Intrinsic::arm_ldaex:
20087 case Intrinsic::arm_ldrex: {
20088 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
20089 unsigned MemBits = VT.getScalarSizeInBits();
20090 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
20091 return;
20092 }
20093 }
20094 }
20095 case ARMISD::BFI: {
20096 // Conservatively, we can recurse down the first operand
20097 // and just mask out all affected bits.
20098 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
20099
20100 // The operand to BFI is already a mask suitable for removing the bits it
20101 // sets.
20102 const APInt &Mask = Op.getConstantOperandAPInt(2);
20103 Known.Zero &= Mask;
20104 Known.One &= Mask;
20105 return;
20106 }
20107 case ARMISD::VGETLANEs:
20108 case ARMISD::VGETLANEu: {
20109 const SDValue &SrcSV = Op.getOperand(0);
20110 EVT VecVT = SrcSV.getValueType();
20111 assert(VecVT.isVector() && "VGETLANE expected a vector type");
20112 const unsigned NumSrcElts = VecVT.getVectorNumElements();
20113 ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
20114 assert(Pos->getAPIntValue().ult(NumSrcElts) &&
20115 "VGETLANE index out of bounds");
20116 unsigned Idx = Pos->getZExtValue();
20117 APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
20118 Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
20119
20120 EVT VT = Op.getValueType();
20121 const unsigned DstSz = VT.getScalarSizeInBits();
20122 const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
20123 (void)SrcSz;
20124 assert(SrcSz == Known.getBitWidth());
20125 assert(DstSz > SrcSz);
20126 if (Op.getOpcode() == ARMISD::VGETLANEs)
20127 Known = Known.sext(DstSz);
20128 else {
20129 Known = Known.zext(DstSz);
20130 }
20131 assert(DstSz == Known.getBitWidth());
20132 break;
20133 }
20134 case ARMISD::VMOVrh: {
20135 KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20136 assert(KnownOp.getBitWidth() == 16);
20137 Known = KnownOp.zext(32);
20138 break;
20139 }
20140 case ARMISD::CSINC:
20141 case ARMISD::CSINV:
20142 case ARMISD::CSNEG: {
20143 KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20144 KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
20145
20146 // The result is either:
20147 // CSINC: KnownOp0 or KnownOp1 + 1
20148 // CSINV: KnownOp0 or ~KnownOp1
20149 // CSNEG: KnownOp0 or KnownOp1 * -1
20150 if (Op.getOpcode() == ARMISD::CSINC)
20151 KnownOp1 = KnownBits::computeForAddSub(
20152 /*Add=*/true, /*NSW=*/false, /*NUW=*/false, KnownOp1,
20154 else if (Op.getOpcode() == ARMISD::CSINV)
20155 std::swap(KnownOp1.Zero, KnownOp1.One);
20156 else if (Op.getOpcode() == ARMISD::CSNEG)
20157 KnownOp1 = KnownBits::mul(
20158 KnownOp1, KnownBits::makeConstant(APInt(32, -1)));
20159
20160 Known = KnownOp0.intersectWith(KnownOp1);
20161 break;
20162 }
20163 }
20164}
20165
20167 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
20168 TargetLoweringOpt &TLO) const {
20169 // Delay optimization, so we don't have to deal with illegal types, or block
20170 // optimizations.
20171 if (!TLO.LegalOps)
20172 return false;
20173
20174 // Only optimize AND for now.
20175 if (Op.getOpcode() != ISD::AND)
20176 return false;
20177
20178 EVT VT = Op.getValueType();
20179
20180 // Ignore vectors.
20181 if (VT.isVector())
20182 return false;
20183
20184 assert(VT == MVT::i32 && "Unexpected integer type");
20185
20186 // Make sure the RHS really is a constant.
20187 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
20188 if (!C)
20189 return false;
20190
20191 unsigned Mask = C->getZExtValue();
20192
20193 unsigned Demanded = DemandedBits.getZExtValue();
20194 unsigned ShrunkMask = Mask & Demanded;
20195 unsigned ExpandedMask = Mask | ~Demanded;
20196
20197 // If the mask is all zeros, let the target-independent code replace the
20198 // result with zero.
20199 if (ShrunkMask == 0)
20200 return false;
20201
20202 // If the mask is all ones, erase the AND. (Currently, the target-independent
20203 // code won't do this, so we have to do it explicitly to avoid an infinite
20204 // loop in obscure cases.)
20205 if (ExpandedMask == ~0U)
20206 return TLO.CombineTo(Op, Op.getOperand(0));
20207
20208 auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20209 return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20210 };
20211 auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20212 if (NewMask == Mask)
20213 return true;
20214 SDLoc DL(Op);
20215 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20216 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20217 return TLO.CombineTo(Op, NewOp);
20218 };
20219
20220 // Prefer uxtb mask.
20221 if (IsLegalMask(0xFF))
20222 return UseMask(0xFF);
20223
20224 // Prefer uxth mask.
20225 if (IsLegalMask(0xFFFF))
20226 return UseMask(0xFFFF);
20227
20228 // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
20229 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20230 if (ShrunkMask < 256)
20231 return UseMask(ShrunkMask);
20232
20233 // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20234 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20235 if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20236 return UseMask(ExpandedMask);
20237
20238 // Potential improvements:
20239 //
20240 // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
20241 // We could try to prefer Thumb1 immediates which can be lowered to a
20242 // two-instruction sequence.
20243 // We could try to recognize more legal ARM/Thumb2 immediates here.
20244
20245 return false;
20246}
20247
20249 SDValue Op, const APInt &OriginalDemandedBits,
20250 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
20251 unsigned Depth) const {
20252 unsigned Opc = Op.getOpcode();
20253
20254 switch (Opc) {
20255 case ARMISD::ASRL:
20256 case ARMISD::LSRL: {
20257 // If this is result 0 and the other result is unused, see if the demand
20258 // bits allow us to shrink this long shift into a standard small shift in
20259 // the opposite direction.
20260 if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
20261 isa<ConstantSDNode>(Op->getOperand(2))) {
20262 unsigned ShAmt = Op->getConstantOperandVal(2);
20263 if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(APInt::getAllOnes(32)
20264 << (32 - ShAmt)))
20265 return TLO.CombineTo(
20266 Op, TLO.DAG.getNode(
20267 ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
20268 TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
20269 }
20270 break;
20271 }
20272 case ARMISD::VBICIMM: {
20273 SDValue Op0 = Op.getOperand(0);
20274 unsigned ModImm = Op.getConstantOperandVal(1);
20275 unsigned EltBits = 0;
20276 uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits);
20277 if ((OriginalDemandedBits & Mask) == 0)
20278 return TLO.CombineTo(Op, Op0);
20279 }
20280 }
20281
20283 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
20284}
20285
20286//===----------------------------------------------------------------------===//
20287// ARM Inline Assembly Support
20288//===----------------------------------------------------------------------===//
20289
20291 // Looking for "rev" which is V6+.
20292 if (!Subtarget->hasV6Ops())
20293 return false;
20294
20295 InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
20296 StringRef AsmStr = IA->getAsmString();
20297 SmallVector<StringRef, 4> AsmPieces;
20298 SplitString(AsmStr, AsmPieces, ";\n");
20299
20300 switch (AsmPieces.size()) {
20301 default: return false;
20302 case 1:
20303 AsmStr = AsmPieces[0];
20304 AsmPieces.clear();
20305 SplitString(AsmStr, AsmPieces, " \t,");
20306
20307 // rev $0, $1
20308 if (AsmPieces.size() == 3 &&
20309 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
20310 IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
20311 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
20312 if (Ty && Ty->getBitWidth() == 32)
20314 }
20315 break;
20316 }
20317
20318 return false;
20319}
20320
20321const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
20322 // At this point, we have to lower this constraint to something else, so we
20323 // lower it to an "r" or "w". However, by doing this we will force the result
20324 // to be in register, while the X constraint is much more permissive.
20325 //
20326 // Although we are correct (we are free to emit anything, without
20327 // constraints), we might break use cases that would expect us to be more
20328 // efficient and emit something else.
20329 if (!Subtarget->hasVFP2Base())
20330 return "r";
20331 if (ConstraintVT.isFloatingPoint())
20332 return "w";
20333 if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
20334 (ConstraintVT.getSizeInBits() == 64 ||
20335 ConstraintVT.getSizeInBits() == 128))
20336 return "w";
20337
20338 return "r";
20339}
20340
20341/// getConstraintType - Given a constraint letter, return the type of
20342/// constraint it is for this target.
20345 unsigned S = Constraint.size();
20346 if (S == 1) {
20347 switch (Constraint[0]) {
20348 default: break;
20349 case 'l': return C_RegisterClass;
20350 case 'w': return C_RegisterClass;
20351 case 'h': return C_RegisterClass;
20352 case 'x': return C_RegisterClass;
20353 case 't': return C_RegisterClass;
20354 case 'j': return C_Immediate; // Constant for movw.
20355 // An address with a single base register. Due to the way we
20356 // currently handle addresses it is the same as an 'r' memory constraint.
20357 case 'Q': return C_Memory;
20358 }
20359 } else if (S == 2) {
20360 switch (Constraint[0]) {
20361 default: break;
20362 case 'T': return C_RegisterClass;
20363 // All 'U+' constraints are addresses.
20364 case 'U': return C_Memory;
20365 }
20366 }
20367 return TargetLowering::getConstraintType(Constraint);
20368}
20369
20370/// Examine constraint type and operand type and determine a weight value.
20371/// This object must already have been set up with the operand type
20372/// and the current alternative constraint selected.
20375 AsmOperandInfo &info, const char *constraint) const {
20377 Value *CallOperandVal = info.CallOperandVal;
20378 // If we don't have a value, we can't do a match,
20379 // but allow it at the lowest weight.
20380 if (!CallOperandVal)
20381 return CW_Default;
20382 Type *type = CallOperandVal->getType();
20383 // Look at the constraint type.
20384 switch (*constraint) {
20385 default:
20387 break;
20388 case 'l':
20389 if (type->isIntegerTy()) {
20390 if (Subtarget->isThumb())
20391 weight = CW_SpecificReg;
20392 else
20393 weight = CW_Register;
20394 }
20395 break;
20396 case 'w':
20397 if (type->isFloatingPointTy())
20398 weight = CW_Register;
20399 break;
20400 }
20401 return weight;
20402}
20403
20404using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
20405
20407 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
20408 switch (Constraint.size()) {
20409 case 1:
20410 // GCC ARM Constraint Letters
20411 switch (Constraint[0]) {
20412 case 'l': // Low regs or general regs.
20413 if (Subtarget->isThumb())
20414 return RCPair(0U, &ARM::tGPRRegClass);
20415 return RCPair(0U, &ARM::GPRRegClass);
20416 case 'h': // High regs or no regs.
20417 if (Subtarget->isThumb())
20418 return RCPair(0U, &ARM::hGPRRegClass);
20419 break;
20420 case 'r':
20421 if (Subtarget->isThumb1Only())
20422 return RCPair(0U, &ARM::tGPRRegClass);
20423 return RCPair(0U, &ARM::GPRRegClass);
20424 case 'w':
20425 if (VT == MVT::Other)
20426 break;
20427 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20428 return RCPair(0U, &ARM::SPRRegClass);
20429 if (VT.getSizeInBits() == 64)
20430 return RCPair(0U, &ARM::DPRRegClass);
20431 if (VT.getSizeInBits() == 128)
20432 return RCPair(0U, &ARM::QPRRegClass);
20433 break;
20434 case 'x':
20435 if (VT == MVT::Other)
20436 break;
20437 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20438 return RCPair(0U, &ARM::SPR_8RegClass);
20439 if (VT.getSizeInBits() == 64)
20440 return RCPair(0U, &ARM::DPR_8RegClass);
20441 if (VT.getSizeInBits() == 128)
20442 return RCPair(0U, &ARM::QPR_8RegClass);
20443 break;
20444 case 't':
20445 if (VT == MVT::Other)
20446 break;
20447 if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16)
20448 return RCPair(0U, &ARM::SPRRegClass);
20449 if (VT.getSizeInBits() == 64)
20450 return RCPair(0U, &ARM::DPR_VFP2RegClass);
20451 if (VT.getSizeInBits() == 128)
20452 return RCPair(0U, &ARM::QPR_VFP2RegClass);
20453 break;
20454 }
20455 break;
20456
20457 case 2:
20458 if (Constraint[0] == 'T') {
20459 switch (Constraint[1]) {
20460 default:
20461 break;
20462 case 'e':
20463 return RCPair(0U, &ARM::tGPREvenRegClass);
20464 case 'o':
20465 return RCPair(0U, &ARM::tGPROddRegClass);
20466 }
20467 }
20468 break;
20469
20470 default:
20471 break;
20472 }
20473
20474 if (StringRef("{cc}").equals_insensitive(Constraint))
20475 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
20476
20477 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
20478}
20479
20480/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
20481/// vector. If it is invalid, don't add anything to Ops.
20483 StringRef Constraint,
20484 std::vector<SDValue> &Ops,
20485 SelectionDAG &DAG) const {
20486 SDValue Result;
20487
20488 // Currently only support length 1 constraints.
20489 if (Constraint.size() != 1)
20490 return;
20491
20492 char ConstraintLetter = Constraint[0];
20493 switch (ConstraintLetter) {
20494 default: break;
20495 case 'j':
20496 case 'I': case 'J': case 'K': case 'L':
20497 case 'M': case 'N': case 'O':
20498 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
20499 if (!C)
20500 return;
20501
20502 int64_t CVal64 = C->getSExtValue();
20503 int CVal = (int) CVal64;
20504 // None of these constraints allow values larger than 32 bits. Check
20505 // that the value fits in an int.
20506 if (CVal != CVal64)
20507 return;
20508
20509 switch (ConstraintLetter) {
20510 case 'j':
20511 // Constant suitable for movw, must be between 0 and
20512 // 65535.
20513 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20514 if (CVal >= 0 && CVal <= 65535)
20515 break;
20516 return;
20517 case 'I':
20518 if (Subtarget->isThumb1Only()) {
20519 // This must be a constant between 0 and 255, for ADD
20520 // immediates.
20521 if (CVal >= 0 && CVal <= 255)
20522 break;
20523 } else if (Subtarget->isThumb2()) {
20524 // A constant that can be used as an immediate value in a
20525 // data-processing instruction.
20526 if (ARM_AM::getT2SOImmVal(CVal) != -1)
20527 break;
20528 } else {
20529 // A constant that can be used as an immediate value in a
20530 // data-processing instruction.
20531 if (ARM_AM::getSOImmVal(CVal) != -1)
20532 break;
20533 }
20534 return;
20535
20536 case 'J':
20537 if (Subtarget->isThumb1Only()) {
20538 // This must be a constant between -255 and -1, for negated ADD
20539 // immediates. This can be used in GCC with an "n" modifier that
20540 // prints the negated value, for use with SUB instructions. It is
20541 // not useful otherwise but is implemented for compatibility.
20542 if (CVal >= -255 && CVal <= -1)
20543 break;
20544 } else {
20545 // This must be a constant between -4095 and 4095. It is not clear
20546 // what this constraint is intended for. Implemented for
20547 // compatibility with GCC.
20548 if (CVal >= -4095 && CVal <= 4095)
20549 break;
20550 }
20551 return;
20552
20553 case 'K':
20554 if (Subtarget->isThumb1Only()) {
20555 // A 32-bit value where only one byte has a nonzero value. Exclude
20556 // zero to match GCC. This constraint is used by GCC internally for
20557 // constants that can be loaded with a move/shift combination.
20558 // It is not useful otherwise but is implemented for compatibility.
20559 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
20560 break;
20561 } else if (Subtarget->isThumb2()) {
20562 // A constant whose bitwise inverse can be used as an immediate
20563 // value in a data-processing instruction. This can be used in GCC
20564 // with a "B" modifier that prints the inverted value, for use with
20565 // BIC and MVN instructions. It is not useful otherwise but is
20566 // implemented for compatibility.
20567 if (ARM_AM::getT2SOImmVal(~CVal) != -1)
20568 break;
20569 } else {
20570 // A constant whose bitwise inverse can be used as an immediate
20571 // value in a data-processing instruction. This can be used in GCC
20572 // with a "B" modifier that prints the inverted value, for use with
20573 // BIC and MVN instructions. It is not useful otherwise but is
20574 // implemented for compatibility.
20575 if (ARM_AM::getSOImmVal(~CVal) != -1)
20576 break;
20577 }
20578 return;
20579
20580 case 'L':
20581 if (Subtarget->isThumb1Only()) {
20582 // This must be a constant between -7 and 7,
20583 // for 3-operand ADD/SUB immediate instructions.
20584 if (CVal >= -7 && CVal < 7)
20585 break;
20586 } else if (Subtarget->isThumb2()) {
20587 // A constant whose negation can be used as an immediate value in a
20588 // data-processing instruction. This can be used in GCC with an "n"
20589 // modifier that prints the negated value, for use with SUB
20590 // instructions. It is not useful otherwise but is implemented for
20591 // compatibility.
20592 if (ARM_AM::getT2SOImmVal(-CVal) != -1)
20593 break;
20594 } else {
20595 // A constant whose negation can be used as an immediate value in a
20596 // data-processing instruction. This can be used in GCC with an "n"
20597 // modifier that prints the negated value, for use with SUB
20598 // instructions. It is not useful otherwise but is implemented for
20599 // compatibility.
20600 if (ARM_AM::getSOImmVal(-CVal) != -1)
20601 break;
20602 }
20603 return;
20604
20605 case 'M':
20606 if (Subtarget->isThumb1Only()) {
20607 // This must be a multiple of 4 between 0 and 1020, for
20608 // ADD sp + immediate.
20609 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
20610 break;
20611 } else {
20612 // A power of two or a constant between 0 and 32. This is used in
20613 // GCC for the shift amount on shifted register operands, but it is
20614 // useful in general for any shift amounts.
20615 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20616 break;
20617 }
20618 return;
20619
20620 case 'N':
20621 if (Subtarget->isThumb1Only()) {
20622 // This must be a constant between 0 and 31, for shift amounts.
20623 if (CVal >= 0 && CVal <= 31)
20624 break;
20625 }
20626 return;
20627
20628 case 'O':
20629 if (Subtarget->isThumb1Only()) {
20630 // This must be a multiple of 4 between -508 and 508, for
20631 // ADD/SUB sp = sp + immediate.
20632 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20633 break;
20634 }
20635 return;
20636 }
20637 Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
20638 break;
20639 }
20640
20641 if (Result.getNode()) {
20642 Ops.push_back(Result);
20643 return;
20644 }
20645 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20646}
20647
20649 const SDNode *N, MVT::SimpleValueType SVT) {
20650 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20651 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20652 "Unhandled Opcode in getDivRemLibcall");
20653 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20654 N->getOpcode() == ISD::SREM;
20655 RTLIB::Libcall LC;
20656 switch (SVT) {
20657 default: llvm_unreachable("Unexpected request for libcall!");
20658 case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
20659 case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
20660 case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
20661 case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
20662 }
20663 return LC;
20664}
20665
20667 const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
20668 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20669 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20670 "Unhandled Opcode in getDivRemArgList");
20671 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20672 N->getOpcode() == ISD::SREM;
20675 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20676 EVT ArgVT = N->getOperand(i).getValueType();
20677 Type *ArgTy = ArgVT.getTypeForEVT(*Context);
20678 Entry.Node = N->getOperand(i);
20679 Entry.Ty = ArgTy;
20680 Entry.IsSExt = isSigned;
20681 Entry.IsZExt = !isSigned;
20682 Args.push_back(Entry);
20683 }
20684 if (Subtarget->isTargetWindows() && Args.size() >= 2)
20685 std::swap(Args[0], Args[1]);
20686 return Args;
20687}
20688
20689SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
20690 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
20691 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
20692 Subtarget->isTargetWindows()) &&
20693 "Register-based DivRem lowering only");
20694 unsigned Opcode = Op->getOpcode();
20695 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
20696 "Invalid opcode for Div/Rem lowering");
20697 bool isSigned = (Opcode == ISD::SDIVREM);
20698 EVT VT = Op->getValueType(0);
20699 SDLoc dl(Op);
20700
20701 if (VT == MVT::i64 && isa<ConstantSDNode>(Op.getOperand(1))) {
20703 if (expandDIVREMByConstant(Op.getNode(), Result, MVT::i32, DAG)) {
20704 SDValue Res0 =
20705 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[0], Result[1]);
20706 SDValue Res1 =
20707 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[2], Result[3]);
20708 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
20709 {Res0, Res1});
20710 }
20711 }
20712
20713 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
20714
20715 // If the target has hardware divide, use divide + multiply + subtract:
20716 // div = a / b
20717 // rem = a - b * div
20718 // return {div, rem}
20719 // This should be lowered into UDIV/SDIV + MLS later on.
20720 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20721 : Subtarget->hasDivideInARMMode();
20722 if (hasDivide && Op->getValueType(0).isSimple() &&
20723 Op->getSimpleValueType(0) == MVT::i32) {
20724 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
20725 const SDValue Dividend = Op->getOperand(0);
20726 const SDValue Divisor = Op->getOperand(1);
20727 SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
20728 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
20729 SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
20730
20731 SDValue Values[2] = {Div, Rem};
20732 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
20733 }
20734
20735 RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
20736 VT.getSimpleVT().SimpleTy);
20737 SDValue InChain = DAG.getEntryNode();
20738
20740 DAG.getContext(),
20741 Subtarget);
20742
20745
20746 Type *RetTy = StructType::get(Ty, Ty);
20747
20748 if (Subtarget->isTargetWindows())
20749 InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
20750
20752 CLI.setDebugLoc(dl).setChain(InChain)
20753 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
20755
20756 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
20757 return CallInfo.first;
20758}
20759
20760// Lowers REM using divmod helpers
20761// see RTABI section 4.2/4.3
20762SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
20763 EVT VT = N->getValueType(0);
20764
20765 if (VT == MVT::i64 && isa<ConstantSDNode>(N->getOperand(1))) {
20767 if (expandDIVREMByConstant(N, Result, MVT::i32, DAG))
20768 return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), N->getValueType(0),
20769 Result[0], Result[1]);
20770 }
20771
20772 // Build return types (div and rem)
20773 std::vector<Type*> RetTyParams;
20774 Type *RetTyElement;
20775
20776 switch (VT.getSimpleVT().SimpleTy) {
20777 default: llvm_unreachable("Unexpected request for libcall!");
20778 case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
20779 case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
20780 case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
20781 case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
20782 }
20783
20784 RetTyParams.push_back(RetTyElement);
20785 RetTyParams.push_back(RetTyElement);
20786 ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
20787 Type *RetTy = StructType::get(*DAG.getContext(), ret);
20788
20789 RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
20790 SimpleTy);
20791 SDValue InChain = DAG.getEntryNode();
20793 Subtarget);
20794 bool isSigned = N->getOpcode() == ISD::SREM;
20797
20798 if (Subtarget->isTargetWindows())
20799 InChain = WinDBZCheckDenominator(DAG, N, InChain);
20800
20801 // Lower call
20802 CallLoweringInfo CLI(DAG);
20803 CLI.setChain(InChain)
20804 .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
20806 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
20807
20808 // Return second (rem) result operand (first contains div)
20809 SDNode *ResNode = CallResult.first.getNode();
20810 assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
20811 return ResNode->getOperand(1);
20812}
20813
20814SDValue
20815ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
20816 assert(Subtarget->isTargetWindows() && "unsupported target platform");
20817 SDLoc DL(Op);
20818
20819 // Get the inputs.
20820 SDValue Chain = Op.getOperand(0);
20821 SDValue Size = Op.getOperand(1);
20822
20824 "no-stack-arg-probe")) {
20826 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
20827 SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20828 Chain = SP.getValue(1);
20829 SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
20830 if (Align)
20831 SP =
20832 DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
20833 DAG.getConstant(-(uint64_t)Align->value(), DL, MVT::i32));
20834 Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
20835 SDValue Ops[2] = { SP, Chain };
20836 return DAG.getMergeValues(Ops, DL);
20837 }
20838
20839 SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
20840 DAG.getConstant(2, DL, MVT::i32));
20841
20842 SDValue Glue;
20843 Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Glue);
20844 Glue = Chain.getValue(1);
20845
20846 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20847 Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Glue);
20848
20849 SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20850 Chain = NewSP.getValue(1);
20851
20852 SDValue Ops[2] = { NewSP, Chain };
20853 return DAG.getMergeValues(Ops, DL);
20854}
20855
20856SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
20857 bool IsStrict = Op->isStrictFPOpcode();
20858 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20859 const unsigned DstSz = Op.getValueType().getSizeInBits();
20860 const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
20861 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
20862 "Unexpected type for custom-lowering FP_EXTEND");
20863
20864 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20865 "With both FP DP and 16, any FP conversion is legal!");
20866
20867 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
20868 "With FP16, 16 to 32 conversion is legal!");
20869
20870 // Converting from 32 -> 64 is valid if we have FP64.
20871 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20872 // FIXME: Remove this when we have strict fp instruction selection patterns
20873 if (IsStrict) {
20874 SDLoc Loc(Op);
20876 Loc, Op.getValueType(), SrcVal);
20877 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
20878 }
20879 return Op;
20880 }
20881
20882 // Either we are converting from 16 -> 64, without FP16 and/or
20883 // FP.double-precision or without Armv8-fp. So we must do it in two
20884 // steps.
20885 // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
20886 // without FP16. So we must do a function call.
20887 SDLoc Loc(Op);
20888 RTLIB::Libcall LC;
20889 MakeLibCallOptions CallOptions;
20890 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20891 for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
20892 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20893 MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
20894 MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
20895 if (Supported) {
20896 if (IsStrict) {
20897 SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,
20898 {DstVT, MVT::Other}, {Chain, SrcVal});
20899 Chain = SrcVal.getValue(1);
20900 } else {
20901 SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);
20902 }
20903 } else {
20904 LC = RTLIB::getFPEXT(SrcVT, DstVT);
20905 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20906 "Unexpected type for custom-lowering FP_EXTEND");
20907 std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20908 Loc, Chain);
20909 }
20910 }
20911
20912 return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
20913}
20914
20915SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
20916 bool IsStrict = Op->isStrictFPOpcode();
20917
20918 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20919 EVT SrcVT = SrcVal.getValueType();
20920 EVT DstVT = Op.getValueType();
20921 const unsigned DstSz = Op.getValueType().getSizeInBits();
20922 const unsigned SrcSz = SrcVT.getSizeInBits();
20923 (void)DstSz;
20924 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
20925 "Unexpected type for custom-lowering FP_ROUND");
20926
20927 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20928 "With both FP DP and 16, any FP conversion is legal!");
20929
20930 SDLoc Loc(Op);
20931
20932 // Instruction from 32 -> 16 if hasFP16 is valid
20933 if (SrcSz == 32 && Subtarget->hasFP16())
20934 return Op;
20935
20936 // Lib call from 32 -> 16 / 64 -> [32, 16]
20937 RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
20938 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20939 "Unexpected type for custom-lowering FP_ROUND");
20940 MakeLibCallOptions CallOptions;
20941 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20943 std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20944 Loc, Chain);
20945 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
20946}
20947
20948bool
20950 // The ARM target isn't yet aware of offsets.
20951 return false;
20952}
20953
20955 if (v == 0xffffffff)
20956 return false;
20957
20958 // there can be 1's on either or both "outsides", all the "inside"
20959 // bits must be 0's
20960 return isShiftedMask_32(~v);
20961}
20962
20963/// isFPImmLegal - Returns true if the target can instruction select the
20964/// specified FP immediate natively. If false, the legalizer will
20965/// materialize the FP immediate as a load from a constant pool.
20967 bool ForCodeSize) const {
20968 if (!Subtarget->hasVFP3Base())
20969 return false;
20970 if (VT == MVT::f16 && Subtarget->hasFullFP16())
20971 return ARM_AM::getFP16Imm(Imm) != -1;
20972 if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
20973 ARM_AM::getFP32FP16Imm(Imm) != -1)
20974 return true;
20975 if (VT == MVT::f32)
20976 return ARM_AM::getFP32Imm(Imm) != -1;
20977 if (VT == MVT::f64 && Subtarget->hasFP64())
20978 return ARM_AM::getFP64Imm(Imm) != -1;
20979 return false;
20980}
20981
20982/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
20983/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
20984/// specified in the intrinsic calls.
20986 const CallInst &I,
20987 MachineFunction &MF,
20988 unsigned Intrinsic) const {
20989 switch (Intrinsic) {
20990 case Intrinsic::arm_neon_vld1:
20991 case Intrinsic::arm_neon_vld2:
20992 case Intrinsic::arm_neon_vld3:
20993 case Intrinsic::arm_neon_vld4:
20994 case Intrinsic::arm_neon_vld2lane:
20995 case Intrinsic::arm_neon_vld3lane:
20996 case Intrinsic::arm_neon_vld4lane:
20997 case Intrinsic::arm_neon_vld2dup:
20998 case Intrinsic::arm_neon_vld3dup:
20999 case Intrinsic::arm_neon_vld4dup: {
21001 // Conservatively set memVT to the entire set of vectors loaded.
21002 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21003 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
21004 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21005 Info.ptrVal = I.getArgOperand(0);
21006 Info.offset = 0;
21007 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
21008 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
21009 // volatile loads with NEON intrinsics not supported
21011 return true;
21012 }
21013 case Intrinsic::arm_neon_vld1x2:
21014 case Intrinsic::arm_neon_vld1x3:
21015 case Intrinsic::arm_neon_vld1x4: {
21017 // Conservatively set memVT to the entire set of vectors loaded.
21018 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21019 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
21020 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21021 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
21022 Info.offset = 0;
21023 Info.align.reset();
21024 // volatile loads with NEON intrinsics not supported
21026 return true;
21027 }
21028 case Intrinsic::arm_neon_vst1:
21029 case Intrinsic::arm_neon_vst2:
21030 case Intrinsic::arm_neon_vst3:
21031 case Intrinsic::arm_neon_vst4:
21032 case Intrinsic::arm_neon_vst2lane:
21033 case Intrinsic::arm_neon_vst3lane:
21034 case Intrinsic::arm_neon_vst4lane: {
21036 // Conservatively set memVT to the entire set of vectors stored.
21037 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21038 unsigned NumElts = 0;
21039 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
21040 Type *ArgTy = I.getArgOperand(ArgI)->getType();
21041 if (!ArgTy->isVectorTy())
21042 break;
21043 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
21044 }
21045 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21046 Info.ptrVal = I.getArgOperand(0);
21047 Info.offset = 0;
21048 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
21049 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
21050 // volatile stores with NEON intrinsics not supported
21052 return true;
21053 }
21054 case Intrinsic::arm_neon_vst1x2:
21055 case Intrinsic::arm_neon_vst1x3:
21056 case Intrinsic::arm_neon_vst1x4: {
21058 // Conservatively set memVT to the entire set of vectors stored.
21059 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21060 unsigned NumElts = 0;
21061 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
21062 Type *ArgTy = I.getArgOperand(ArgI)->getType();
21063 if (!ArgTy->isVectorTy())
21064 break;
21065 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
21066 }
21067 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21068 Info.ptrVal = I.getArgOperand(0);
21069 Info.offset = 0;
21070 Info.align.reset();
21071 // volatile stores with NEON intrinsics not supported
21073 return true;
21074 }
21075 case Intrinsic::arm_mve_vld2q:
21076 case Intrinsic::arm_mve_vld4q: {
21078 // Conservatively set memVT to the entire set of vectors loaded.
21079 Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
21080 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
21081 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21082 Info.ptrVal = I.getArgOperand(0);
21083 Info.offset = 0;
21084 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21085 // volatile loads with MVE intrinsics not supported
21087 return true;
21088 }
21089 case Intrinsic::arm_mve_vst2q:
21090 case Intrinsic::arm_mve_vst4q: {
21092 // Conservatively set memVT to the entire set of vectors stored.
21093 Type *VecTy = I.getArgOperand(1)->getType();
21094 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
21095 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21096 Info.ptrVal = I.getArgOperand(0);
21097 Info.offset = 0;
21098 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21099 // volatile stores with MVE intrinsics not supported
21101 return true;
21102 }
21103 case Intrinsic::arm_mve_vldr_gather_base:
21104 case Intrinsic::arm_mve_vldr_gather_base_predicated: {
21106 Info.ptrVal = nullptr;
21107 Info.memVT = MVT::getVT(I.getType());
21108 Info.align = Align(1);
21110 return true;
21111 }
21112 case Intrinsic::arm_mve_vldr_gather_base_wb:
21113 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
21115 Info.ptrVal = nullptr;
21116 Info.memVT = MVT::getVT(I.getType()->getContainedType(0));
21117 Info.align = Align(1);
21119 return true;
21120 }
21121 case Intrinsic::arm_mve_vldr_gather_offset:
21122 case Intrinsic::arm_mve_vldr_gather_offset_predicated: {
21124 Info.ptrVal = nullptr;
21125 MVT DataVT = MVT::getVT(I.getType());
21126 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
21127 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21128 DataVT.getVectorNumElements());
21129 Info.align = Align(1);
21131 return true;
21132 }
21133 case Intrinsic::arm_mve_vstr_scatter_base:
21134 case Intrinsic::arm_mve_vstr_scatter_base_predicated: {
21136 Info.ptrVal = nullptr;
21137 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21138 Info.align = Align(1);
21140 return true;
21141 }
21142 case Intrinsic::arm_mve_vstr_scatter_base_wb:
21143 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: {
21145 Info.ptrVal = nullptr;
21146 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21147 Info.align = Align(1);
21149 return true;
21150 }
21151 case Intrinsic::arm_mve_vstr_scatter_offset:
21152 case Intrinsic::arm_mve_vstr_scatter_offset_predicated: {
21154 Info.ptrVal = nullptr;
21155 MVT DataVT = MVT::getVT(I.getArgOperand(2)->getType());
21156 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
21157 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21158 DataVT.getVectorNumElements());
21159 Info.align = Align(1);
21161 return true;
21162 }
21163 case Intrinsic::arm_ldaex:
21164 case Intrinsic::arm_ldrex: {
21165 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21166 Type *ValTy = I.getParamElementType(0);
21168 Info.memVT = MVT::getVT(ValTy);
21169 Info.ptrVal = I.getArgOperand(0);
21170 Info.offset = 0;
21171 Info.align = DL.getABITypeAlign(ValTy);
21173 return true;
21174 }
21175 case Intrinsic::arm_stlex:
21176 case Intrinsic::arm_strex: {
21177 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21178 Type *ValTy = I.getParamElementType(1);
21180 Info.memVT = MVT::getVT(ValTy);
21181 Info.ptrVal = I.getArgOperand(1);
21182 Info.offset = 0;
21183 Info.align = DL.getABITypeAlign(ValTy);
21185 return true;
21186 }
21187 case Intrinsic::arm_stlexd:
21188 case Intrinsic::arm_strexd:
21190 Info.memVT = MVT::i64;
21191 Info.ptrVal = I.getArgOperand(2);
21192 Info.offset = 0;
21193 Info.align = Align(8);
21195 return true;
21196
21197 case Intrinsic::arm_ldaexd:
21198 case Intrinsic::arm_ldrexd:
21200 Info.memVT = MVT::i64;
21201 Info.ptrVal = I.getArgOperand(0);
21202 Info.offset = 0;
21203 Info.align = Align(8);
21205 return true;
21206
21207 default:
21208 break;
21209 }
21210
21211 return false;
21212}
21213
21214/// Returns true if it is beneficial to convert a load of a constant
21215/// to just the constant itself.
21217 Type *Ty) const {
21218 assert(Ty->isIntegerTy());
21219
21220 unsigned Bits = Ty->getPrimitiveSizeInBits();
21221 if (Bits == 0 || Bits > 32)
21222 return false;
21223 return true;
21224}
21225
21227 unsigned Index) const {
21229 return false;
21230
21231 return (Index == 0 || Index == ResVT.getVectorNumElements());
21232}
21233
21235 ARM_MB::MemBOpt Domain) const {
21236 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21237
21238 // First, if the target has no DMB, see what fallback we can use.
21239 if (!Subtarget->hasDataBarrier()) {
21240 // Some ARMv6 cpus can support data barriers with an mcr instruction.
21241 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
21242 // here.
21243 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
21244 Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
21245 Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
21246 Builder.getInt32(0), Builder.getInt32(7),
21247 Builder.getInt32(10), Builder.getInt32(5)};
21248 return Builder.CreateCall(MCR, args);
21249 } else {
21250 // Instead of using barriers, atomic accesses on these subtargets use
21251 // libcalls.
21252 llvm_unreachable("makeDMB on a target so old that it has no barriers");
21253 }
21254 } else {
21255 Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
21256 // Only a full system barrier exists in the M-class architectures.
21257 Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
21258 Constant *CDomain = Builder.getInt32(Domain);
21259 return Builder.CreateCall(DMB, CDomain);
21260 }
21261}
21262
21263// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
21265 Instruction *Inst,
21266 AtomicOrdering Ord) const {
21267 switch (Ord) {
21270 llvm_unreachable("Invalid fence: unordered/non-atomic");
21273 return nullptr; // Nothing to do
21275 if (!Inst->hasAtomicStore())
21276 return nullptr; // Nothing to do
21277 [[fallthrough]];
21280 if (Subtarget->preferISHSTBarriers())
21281 return makeDMB(Builder, ARM_MB::ISHST);
21282 // FIXME: add a comment with a link to documentation justifying this.
21283 else
21284 return makeDMB(Builder, ARM_MB::ISH);
21285 }
21286 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
21287}
21288
21290 Instruction *Inst,
21291 AtomicOrdering Ord) const {
21292 switch (Ord) {
21295 llvm_unreachable("Invalid fence: unordered/not-atomic");
21298 return nullptr; // Nothing to do
21302 return makeDMB(Builder, ARM_MB::ISH);
21303 }
21304 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
21305}
21306
21307// Loads and stores less than 64-bits are already atomic; ones above that
21308// are doomed anyway, so defer to the default libcall and blame the OS when
21309// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21310// anything for those.
21313 bool has64BitAtomicStore;
21314 if (Subtarget->isMClass())
21315 has64BitAtomicStore = false;
21316 else if (Subtarget->isThumb())
21317 has64BitAtomicStore = Subtarget->hasV7Ops();
21318 else
21319 has64BitAtomicStore = Subtarget->hasV6Ops();
21320
21321 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
21322 return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
21324}
21325
21326// Loads and stores less than 64-bits are already atomic; ones above that
21327// are doomed anyway, so defer to the default libcall and blame the OS when
21328// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21329// anything for those.
21330// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
21331// guarantee, see DDI0406C ARM architecture reference manual,
21332// sections A8.8.72-74 LDRD)
21335 bool has64BitAtomicLoad;
21336 if (Subtarget->isMClass())
21337 has64BitAtomicLoad = false;
21338 else if (Subtarget->isThumb())
21339 has64BitAtomicLoad = Subtarget->hasV7Ops();
21340 else
21341 has64BitAtomicLoad = Subtarget->hasV6Ops();
21342
21343 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
21344 return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
21346}
21347
21348// For the real atomic operations, we have ldrex/strex up to 32 bits,
21349// and up to 64 bits on the non-M profiles
21352 if (AI->isFloatingPointOperation())
21354
21355 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21356 bool hasAtomicRMW;
21357 if (Subtarget->isMClass())
21358 hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21359 else if (Subtarget->isThumb())
21360 hasAtomicRMW = Subtarget->hasV7Ops();
21361 else
21362 hasAtomicRMW = Subtarget->hasV6Ops();
21363 if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
21364 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21365 // implement atomicrmw without spilling. If the target address is also on
21366 // the stack and close enough to the spill slot, this can lead to a
21367 // situation where the monitor always gets cleared and the atomic operation
21368 // can never succeed. So at -O0 lower this operation to a CAS loop.
21369 if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)
21372 }
21374}
21375
21376// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
21377// bits, and up to 64 bits on the non-M profiles.
21380 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21381 // implement cmpxchg without spilling. If the address being exchanged is also
21382 // on the stack and close enough to the spill slot, this can lead to a
21383 // situation where the monitor always gets cleared and the atomic operation
21384 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
21385 unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
21386 bool HasAtomicCmpXchg;
21387 if (Subtarget->isMClass())
21388 HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21389 else if (Subtarget->isThumb())
21390 HasAtomicCmpXchg = Subtarget->hasV7Ops();
21391 else
21392 HasAtomicCmpXchg = Subtarget->hasV6Ops();
21393 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None &&
21394 HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U))
21397}
21398
21400 const Instruction *I) const {
21401 return InsertFencesForAtomic;
21402}
21403
21405 // ROPI/RWPI are not supported currently.
21406 return !Subtarget->isROPI() && !Subtarget->isRWPI();
21407}
21408
21410 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21412
21413 // MSVC CRT has a global variable holding security cookie.
21414 M.getOrInsertGlobal("__security_cookie",
21415 PointerType::getUnqual(M.getContext()));
21416
21417 // MSVC CRT has a function to validate security cookie.
21418 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
21419 "__security_check_cookie", Type::getVoidTy(M.getContext()),
21420 PointerType::getUnqual(M.getContext()));
21421 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
21422 F->addParamAttr(0, Attribute::AttrKind::InReg);
21423}
21424
21426 // MSVC CRT has a global variable holding security cookie.
21427 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21428 return M.getGlobalVariable("__security_cookie");
21430}
21431
21433 // MSVC CRT has a function to validate security cookie.
21434 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21435 return M.getFunction("__security_check_cookie");
21437}
21438
21440 unsigned &Cost) const {
21441 // If we do not have NEON, vector types are not natively supported.
21442 if (!Subtarget->hasNEON())
21443 return false;
21444
21445 // Floating point values and vector values map to the same register file.
21446 // Therefore, although we could do a store extract of a vector type, this is
21447 // better to leave at float as we have more freedom in the addressing mode for
21448 // those.
21449 if (VectorTy->isFPOrFPVectorTy())
21450 return false;
21451
21452 // If the index is unknown at compile time, this is very expensive to lower
21453 // and it is not possible to combine the store with the extract.
21454 if (!isa<ConstantInt>(Idx))
21455 return false;
21456
21457 assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
21458 unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue();
21459 // We can do a store + vector extract on any vector that fits perfectly in a D
21460 // or Q register.
21461 if (BitWidth == 64 || BitWidth == 128) {
21462 Cost = 0;
21463 return true;
21464 }
21465 return false;
21466}
21467
21469 return Subtarget->hasV6T2Ops();
21470}
21471
21473 return Subtarget->hasV6T2Ops();
21474}
21475
21477 const Instruction &AndI) const {
21478 if (!Subtarget->hasV7Ops())
21479 return false;
21480
21481 // Sink the `and` instruction only if the mask would fit into a modified
21482 // immediate operand.
21483 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
21484 if (!Mask || Mask->getValue().getBitWidth() > 32u)
21485 return false;
21486 auto MaskVal = unsigned(Mask->getValue().getZExtValue());
21487 return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal)
21488 : ARM_AM::getSOImmVal(MaskVal)) != -1;
21489}
21490
21493 SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const {
21494 if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows())
21497 ExpansionFactor);
21498}
21499
21501 Value *Addr,
21502 AtomicOrdering Ord) const {
21503 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21504 bool IsAcquire = isAcquireOrStronger(Ord);
21505
21506 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
21507 // intrinsic must return {i32, i32} and we have to recombine them into a
21508 // single i64 here.
21509 if (ValueTy->getPrimitiveSizeInBits() == 64) {
21511 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
21513
21514 Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
21515
21516 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
21517 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
21518 if (!Subtarget->isLittle())
21519 std::swap (Lo, Hi);
21520 Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
21521 Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
21522 return Builder.CreateOr(
21523 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 32)), "val64");
21524 }
21525
21526 Type *Tys[] = { Addr->getType() };
21527 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
21528 Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
21529 CallInst *CI = Builder.CreateCall(Ldrex, Addr);
21530
21531 CI->addParamAttr(
21532 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
21533 return Builder.CreateTruncOrBitCast(CI, ValueTy);
21534}
21535
21537 IRBuilderBase &Builder) const {
21538 if (!Subtarget->hasV7Ops())
21539 return;
21540 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21541 Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
21542}
21543
21545 Value *Val, Value *Addr,
21546 AtomicOrdering Ord) const {
21547 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21548 bool IsRelease = isReleaseOrStronger(Ord);
21549
21550 // Since the intrinsics must have legal type, the i64 intrinsics take two
21551 // parameters: "i32, i32". We must marshal Val into the appropriate form
21552 // before the call.
21553 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
21555 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
21557 Type *Int32Ty = Type::getInt32Ty(M->getContext());
21558
21559 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
21560 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
21561 if (!Subtarget->isLittle())
21562 std::swap(Lo, Hi);
21563 return Builder.CreateCall(Strex, {Lo, Hi, Addr});
21564 }
21565
21566 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
21567 Type *Tys[] = { Addr->getType() };
21568 Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
21569
21570 CallInst *CI = Builder.CreateCall(
21571 Strex, {Builder.CreateZExtOrBitCast(
21572 Val, Strex->getFunctionType()->getParamType(0)),
21573 Addr});
21574 CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
21575 Val->getType()));
21576 return CI;
21577}
21578
21579
21581 return Subtarget->isMClass();
21582}
21583
21584/// A helper function for determining the number of interleaved accesses we
21585/// will generate when lowering accesses of the given type.
21586unsigned
21588 const DataLayout &DL) const {
21589 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
21590}
21591
21593 unsigned Factor, FixedVectorType *VecTy, Align Alignment,
21594 const DataLayout &DL) const {
21595
21596 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
21597 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
21598
21599 if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
21600 return false;
21601
21602 // Ensure the vector doesn't have f16 elements. Even though we could do an
21603 // i16 vldN, we can't hold the f16 vectors and will end up converting via
21604 // f32.
21605 if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
21606 return false;
21607 if (Subtarget->hasMVEIntegerOps() && Factor == 3)
21608 return false;
21609
21610 // Ensure the number of vector elements is greater than 1.
21611 if (VecTy->getNumElements() < 2)
21612 return false;
21613
21614 // Ensure the element type is legal.
21615 if (ElSize != 8 && ElSize != 16 && ElSize != 32)
21616 return false;
21617 // And the alignment if high enough under MVE.
21618 if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
21619 return false;
21620
21621 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
21622 // 128 will be split into multiple interleaved accesses.
21623 if (Subtarget->hasNEON() && VecSize == 64)
21624 return true;
21625 return VecSize % 128 == 0;
21626}
21627
21629 if (Subtarget->hasNEON())
21630 return 4;
21631 if (Subtarget->hasMVEIntegerOps())
21634}
21635
21636/// Lower an interleaved load into a vldN intrinsic.
21637///
21638/// E.g. Lower an interleaved load (Factor = 2):
21639/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
21640/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21641/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21642///
21643/// Into:
21644/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
21645/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
21646/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
21649 ArrayRef<unsigned> Indices, unsigned Factor) const {
21650 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21651 "Invalid interleave factor");
21652 assert(!Shuffles.empty() && "Empty shufflevector input");
21653 assert(Shuffles.size() == Indices.size() &&
21654 "Unmatched number of shufflevectors and indices");
21655
21656 auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
21657 Type *EltTy = VecTy->getElementType();
21658
21659 const DataLayout &DL = LI->getModule()->getDataLayout();
21660 Align Alignment = LI->getAlign();
21661
21662 // Skip if we do not have NEON and skip illegal vector types. We can
21663 // "legalize" wide vector types into multiple interleaved accesses as long as
21664 // the vector types are divisible by 128.
21665 if (!isLegalInterleavedAccessType(Factor, VecTy, Alignment, DL))
21666 return false;
21667
21668 unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
21669
21670 // A pointer vector can not be the return type of the ldN intrinsics. Need to
21671 // load integer vectors first and then convert to pointer vectors.
21672 if (EltTy->isPointerTy())
21673 VecTy = FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy);
21674
21675 IRBuilder<> Builder(LI);
21676
21677 // The base address of the load.
21678 Value *BaseAddr = LI->getPointerOperand();
21679
21680 if (NumLoads > 1) {
21681 // If we're going to generate more than one load, reset the sub-vector type
21682 // to something legal.
21683 VecTy = FixedVectorType::get(VecTy->getElementType(),
21684 VecTy->getNumElements() / NumLoads);
21685 }
21686
21687 assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
21688
21689 auto createLoadIntrinsic = [&](Value *BaseAddr) {
21690 if (Subtarget->hasNEON()) {
21691 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21692 Type *Tys[] = {VecTy, PtrTy};
21693 static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
21694 Intrinsic::arm_neon_vld3,
21695 Intrinsic::arm_neon_vld4};
21696 Function *VldnFunc =
21697 Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
21698
21700 Ops.push_back(BaseAddr);
21701 Ops.push_back(Builder.getInt32(LI->getAlign().value()));
21702
21703 return Builder.CreateCall(VldnFunc, Ops, "vldN");
21704 } else {
21705 assert((Factor == 2 || Factor == 4) &&
21706 "expected interleave factor of 2 or 4 for MVE");
21707 Intrinsic::ID LoadInts =
21708 Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
21709 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21710 Type *Tys[] = {VecTy, PtrTy};
21711 Function *VldnFunc =
21712 Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys);
21713
21715 Ops.push_back(BaseAddr);
21716 return Builder.CreateCall(VldnFunc, Ops, "vldN");
21717 }
21718 };
21719
21720 // Holds sub-vectors extracted from the load intrinsic return values. The
21721 // sub-vectors are associated with the shufflevector instructions they will
21722 // replace.
21724
21725 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
21726 // If we're generating more than one load, compute the base address of
21727 // subsequent loads as an offset from the previous.
21728 if (LoadCount > 0)
21729 BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
21730 VecTy->getNumElements() * Factor);
21731
21732 CallInst *VldN = createLoadIntrinsic(BaseAddr);
21733
21734 // Replace uses of each shufflevector with the corresponding vector loaded
21735 // by ldN.
21736 for (unsigned i = 0; i < Shuffles.size(); i++) {
21737 ShuffleVectorInst *SV = Shuffles[i];
21738 unsigned Index = Indices[i];
21739
21740 Value *SubVec = Builder.CreateExtractValue(VldN, Index);
21741
21742 // Convert the integer vector to pointer vector if the element is pointer.
21743 if (EltTy->isPointerTy())
21744 SubVec = Builder.CreateIntToPtr(
21745 SubVec,
21747
21748 SubVecs[SV].push_back(SubVec);
21749 }
21750 }
21751
21752 // Replace uses of the shufflevector instructions with the sub-vectors
21753 // returned by the load intrinsic. If a shufflevector instruction is
21754 // associated with more than one sub-vector, those sub-vectors will be
21755 // concatenated into a single wide vector.
21756 for (ShuffleVectorInst *SVI : Shuffles) {
21757 auto &SubVec = SubVecs[SVI];
21758 auto *WideVec =
21759 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
21760 SVI->replaceAllUsesWith(WideVec);
21761 }
21762
21763 return true;
21764}
21765
21766/// Lower an interleaved store into a vstN intrinsic.
21767///
21768/// E.g. Lower an interleaved store (Factor = 3):
21769/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21770/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21771/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
21772///
21773/// Into:
21774/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21775/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21776/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21777/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21778///
21779/// Note that the new shufflevectors will be removed and we'll only generate one
21780/// vst3 instruction in CodeGen.
21781///
21782/// Example for a more general valid mask (Factor 3). Lower:
21783/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
21784/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
21785/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21786///
21787/// Into:
21788/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
21789/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
21790/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
21791/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21793 ShuffleVectorInst *SVI,
21794 unsigned Factor) const {
21795 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21796 "Invalid interleave factor");
21797
21798 auto *VecTy = cast<FixedVectorType>(SVI->getType());
21799 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
21800
21801 unsigned LaneLen = VecTy->getNumElements() / Factor;
21802 Type *EltTy = VecTy->getElementType();
21803 auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
21804
21805 const DataLayout &DL = SI->getModule()->getDataLayout();
21806 Align Alignment = SI->getAlign();
21807
21808 // Skip if we do not have NEON and skip illegal vector types. We can
21809 // "legalize" wide vector types into multiple interleaved accesses as long as
21810 // the vector types are divisible by 128.
21811 if (!isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))
21812 return false;
21813
21814 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
21815
21816 Value *Op0 = SVI->getOperand(0);
21817 Value *Op1 = SVI->getOperand(1);
21818 IRBuilder<> Builder(SI);
21819
21820 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
21821 // vectors to integer vectors.
21822 if (EltTy->isPointerTy()) {
21823 Type *IntTy = DL.getIntPtrType(EltTy);
21824
21825 // Convert to the corresponding integer vector.
21826 auto *IntVecTy =
21827 FixedVectorType::get(IntTy, cast<FixedVectorType>(Op0->getType()));
21828 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
21829 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
21830
21831 SubVecTy = FixedVectorType::get(IntTy, LaneLen);
21832 }
21833
21834 // The base address of the store.
21835 Value *BaseAddr = SI->getPointerOperand();
21836
21837 if (NumStores > 1) {
21838 // If we're going to generate more than one store, reset the lane length
21839 // and sub-vector type to something legal.
21840 LaneLen /= NumStores;
21841 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
21842 }
21843
21844 assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
21845
21846 auto Mask = SVI->getShuffleMask();
21847
21848 auto createStoreIntrinsic = [&](Value *BaseAddr,
21849 SmallVectorImpl<Value *> &Shuffles) {
21850 if (Subtarget->hasNEON()) {
21851 static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
21852 Intrinsic::arm_neon_vst3,
21853 Intrinsic::arm_neon_vst4};
21854 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21855 Type *Tys[] = {PtrTy, SubVecTy};
21856
21858 SI->getModule(), StoreInts[Factor - 2], Tys);
21859
21861 Ops.push_back(BaseAddr);
21862 append_range(Ops, Shuffles);
21863 Ops.push_back(Builder.getInt32(SI->getAlign().value()));
21864 Builder.CreateCall(VstNFunc, Ops);
21865 } else {
21866 assert((Factor == 2 || Factor == 4) &&
21867 "expected interleave factor of 2 or 4 for MVE");
21868 Intrinsic::ID StoreInts =
21869 Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
21870 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21871 Type *Tys[] = {PtrTy, SubVecTy};
21872 Function *VstNFunc =
21873 Intrinsic::getDeclaration(SI->getModule(), StoreInts, Tys);
21874
21876 Ops.push_back(BaseAddr);
21877 append_range(Ops, Shuffles);
21878 for (unsigned F = 0; F < Factor; F++) {
21879 Ops.push_back(Builder.getInt32(F));
21880 Builder.CreateCall(VstNFunc, Ops);
21881 Ops.pop_back();
21882 }
21883 }
21884 };
21885
21886 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
21887 // If we generating more than one store, we compute the base address of
21888 // subsequent stores as an offset from the previous.
21889 if (StoreCount > 0)
21890 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
21891 BaseAddr, LaneLen * Factor);
21892
21893 SmallVector<Value *, 4> Shuffles;
21894
21895 // Split the shufflevector operands into sub vectors for the new vstN call.
21896 for (unsigned i = 0; i < Factor; i++) {
21897 unsigned IdxI = StoreCount * LaneLen * Factor + i;
21898 if (Mask[IdxI] >= 0) {
21899 Shuffles.push_back(Builder.CreateShuffleVector(
21900 Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
21901 } else {
21902 unsigned StartMask = 0;
21903 for (unsigned j = 1; j < LaneLen; j++) {
21904 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
21905 if (Mask[IdxJ * Factor + IdxI] >= 0) {
21906 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
21907 break;
21908 }
21909 }
21910 // Note: If all elements in a chunk are undefs, StartMask=0!
21911 // Note: Filling undef gaps with random elements is ok, since
21912 // those elements were being written anyway (with undefs).
21913 // In the case of all undefs we're defaulting to using elems from 0
21914 // Note: StartMask cannot be negative, it's checked in
21915 // isReInterleaveMask
21916 Shuffles.push_back(Builder.CreateShuffleVector(
21917 Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
21918 }
21919 }
21920
21921 createStoreIntrinsic(BaseAddr, Shuffles);
21922 }
21923 return true;
21924}
21925
21933
21935 uint64_t &Members) {
21936 if (auto *ST = dyn_cast<StructType>(Ty)) {
21937 for (unsigned i = 0; i < ST->getNumElements(); ++i) {
21938 uint64_t SubMembers = 0;
21939 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
21940 return false;
21941 Members += SubMembers;
21942 }
21943 } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
21944 uint64_t SubMembers = 0;
21945 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
21946 return false;
21947 Members += SubMembers * AT->getNumElements();
21948 } else if (Ty->isFloatTy()) {
21949 if (Base != HA_UNKNOWN && Base != HA_FLOAT)
21950 return false;
21951 Members = 1;
21952 Base = HA_FLOAT;
21953 } else if (Ty->isDoubleTy()) {
21954 if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
21955 return false;
21956 Members = 1;
21957 Base = HA_DOUBLE;
21958 } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
21959 Members = 1;
21960 switch (Base) {
21961 case HA_FLOAT:
21962 case HA_DOUBLE:
21963 return false;
21964 case HA_VECT64:
21965 return VT->getPrimitiveSizeInBits().getFixedValue() == 64;
21966 case HA_VECT128:
21967 return VT->getPrimitiveSizeInBits().getFixedValue() == 128;
21968 case HA_UNKNOWN:
21969 switch (VT->getPrimitiveSizeInBits().getFixedValue()) {
21970 case 64:
21971 Base = HA_VECT64;
21972 return true;
21973 case 128:
21974 Base = HA_VECT128;
21975 return true;
21976 default:
21977 return false;
21978 }
21979 }
21980 }
21981
21982 return (Members > 0 && Members <= 4);
21983}
21984
21985/// Return the correct alignment for the current calling convention.
21987 Type *ArgTy, const DataLayout &DL) const {
21988 const Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
21989 if (!ArgTy->isVectorTy())
21990 return ABITypeAlign;
21991
21992 // Avoid over-aligning vector parameters. It would require realigning the
21993 // stack and waste space for no real benefit.
21994 return std::min(ABITypeAlign, DL.getStackAlignment());
21995}
21996
21997/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
21998/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
21999/// passing according to AAPCS rules.
22001 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
22002 const DataLayout &DL) const {
22003 if (getEffectiveCallingConv(CallConv, isVarArg) !=
22005 return false;
22006
22008 uint64_t Members = 0;
22009 bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
22010 LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
22011
22012 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
22013 return IsHA || IsIntArray;
22014}
22015
22017 const Constant *PersonalityFn) const {
22018 // Platforms which do not use SjLj EH may return values in these registers
22019 // via the personality function.
22020 return Subtarget->useSjLjEH() ? Register() : ARM::R0;
22021}
22022
22024 const Constant *PersonalityFn) const {
22025 // Platforms which do not use SjLj EH may return values in these registers
22026 // via the personality function.
22027 return Subtarget->useSjLjEH() ? Register() : ARM::R1;
22028}
22029
22030void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
22031 // Update IsSplitCSR in ARMFunctionInfo.
22032 ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
22033 AFI->setIsSplitCSR(true);
22034}
22035
22036void ARMTargetLowering::insertCopiesSplitCSR(
22037 MachineBasicBlock *Entry,
22038 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
22039 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
22040 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
22041 if (!IStart)
22042 return;
22043
22044 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
22045 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
22046 MachineBasicBlock::iterator MBBI = Entry->begin();
22047 for (const MCPhysReg *I = IStart; *I; ++I) {
22048 const TargetRegisterClass *RC = nullptr;
22049 if (ARM::GPRRegClass.contains(*I))
22050 RC = &ARM::GPRRegClass;
22051 else if (ARM::DPRRegClass.contains(*I))
22052 RC = &ARM::DPRRegClass;
22053 else
22054 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
22055
22056 Register NewVR = MRI->createVirtualRegister(RC);
22057 // Create copy from CSR to a virtual register.
22058 // FIXME: this currently does not emit CFI pseudo-instructions, it works
22059 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
22060 // nounwind. If we want to generalize this later, we may need to emit
22061 // CFI pseudo-instructions.
22062 assert(Entry->getParent()->getFunction().hasFnAttribute(
22063 Attribute::NoUnwind) &&
22064 "Function should be nounwind in insertCopiesSplitCSR!");
22065 Entry->addLiveIn(*I);
22066 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
22067 .addReg(*I);
22068
22069 // Insert the copy-back instructions right before the terminator.
22070 for (auto *Exit : Exits)
22071 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
22072 TII->get(TargetOpcode::COPY), *I)
22073 .addReg(NewVR);
22074 }
22075}
22076
22080}
22081
22083 return Subtarget->hasMVEIntegerOps();
22084}
22085
22088 auto *VTy = dyn_cast<FixedVectorType>(Ty);
22089 if (!VTy)
22090 return false;
22091
22092 auto *ScalarTy = VTy->getScalarType();
22093 unsigned NumElements = VTy->getNumElements();
22094
22095 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
22096 if (VTyWidth < 128 || !llvm::isPowerOf2_32(VTyWidth))
22097 return false;
22098
22099 // Both VCADD and VCMUL/VCMLA support the same types, F16 and F32
22100 if (ScalarTy->isHalfTy() || ScalarTy->isFloatTy())
22101 return Subtarget->hasMVEFloatOps();
22102
22104 return false;
22105
22106 return Subtarget->hasMVEIntegerOps() &&
22107 (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
22108 ScalarTy->isIntegerTy(32));
22109}
22110
22113 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
22114 Value *Accumulator) const {
22115
22116 FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType());
22117
22118 unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
22119
22120 assert(TyWidth >= 128 && "Width of vector type must be at least 128 bits");
22121
22122 if (TyWidth > 128) {
22123 int Stride = Ty->getNumElements() / 2;
22124 auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());
22125 auto SplitSeqVec = llvm::to_vector(SplitSeq);
22126 ArrayRef<int> LowerSplitMask(&SplitSeqVec[0], Stride);
22127 ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);
22128
22129 auto *LowerSplitA = B.CreateShuffleVector(InputA, LowerSplitMask);
22130 auto *LowerSplitB = B.CreateShuffleVector(InputB, LowerSplitMask);
22131 auto *UpperSplitA = B.CreateShuffleVector(InputA, UpperSplitMask);
22132 auto *UpperSplitB = B.CreateShuffleVector(InputB, UpperSplitMask);
22133 Value *LowerSplitAcc = nullptr;
22134 Value *UpperSplitAcc = nullptr;
22135
22136 if (Accumulator) {
22137 LowerSplitAcc = B.CreateShuffleVector(Accumulator, LowerSplitMask);
22138 UpperSplitAcc = B.CreateShuffleVector(Accumulator, UpperSplitMask);
22139 }
22140
22141 auto *LowerSplitInt = createComplexDeinterleavingIR(
22142 B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
22143 auto *UpperSplitInt = createComplexDeinterleavingIR(
22144 B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
22145
22146 ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());
22147 return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
22148 }
22149
22150 auto *IntTy = Type::getInt32Ty(B.getContext());
22151
22152 ConstantInt *ConstRotation = nullptr;
22153 if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
22154 ConstRotation = ConstantInt::get(IntTy, (int)Rotation);
22155
22156 if (Accumulator)
22157 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmlaq, Ty,
22158 {ConstRotation, Accumulator, InputB, InputA});
22159 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmulq, Ty,
22160 {ConstRotation, InputB, InputA});
22161 }
22162
22163 if (OperationType == ComplexDeinterleavingOperation::CAdd) {
22164 // 1 means the value is not halved.
22165 auto *ConstHalving = ConstantInt::get(IntTy, 1);
22166
22168 ConstRotation = ConstantInt::get(IntTy, 0);
22170 ConstRotation = ConstantInt::get(IntTy, 1);
22171
22172 if (!ConstRotation)
22173 return nullptr; // Invalid rotation for arm_mve_vcaddq
22174
22175 return B.CreateIntrinsic(Intrinsic::arm_mve_vcaddq, Ty,
22176 {ConstHalving, ConstRotation, InputA, InputB});
22177 }
22178
22179 return nullptr;
22180}
unsigned const MachineRegisterInfo * MRI
static bool isAddSubSExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isExtendedBUILD_VECTOR(SDValue N, SelectionDAG &DAG, bool isSigned)
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
static bool areExtractExts(Value *Ext1, Value *Ext2)
Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.
static EVT getExtensionTo64Bits(const EVT &OrigVT)
static const MCPhysReg GPRArgRegs[]
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
#define MAKE_CASE(V)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
static bool isAddSubZExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static bool isConstant(const MachineInstr &MI)
static const LLT S1
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
static bool isStore(int Opcode)
static bool isThumb(const MCSubtargetInfo &STI)
static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII)
MatchingStackOffset - Return true if the given stack call argument is already available in the same p...
static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG)
static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
@ HA_DOUBLE
@ HA_VECT128
@ HA_VECT64
@ HA_FLOAT
@ HA_UNKNOWN
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits.
static cl::opt< unsigned > ConstpoolPromotionMaxSize("arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64))
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
static cl::opt< bool > EnableConstpoolPromotion("arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(false))
static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformExtractFpToIntStores(StoreSDNode *St, SelectionDAG &DAG)
static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
static SDValue PerformExtractEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static const APInt * isPowerOf2Constant(SDValue V)
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static bool isValidMVECond(unsigned CC, bool IsFloat)
static SDValue PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, SelectionDAG &DAG)
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isGTorGE(ISD::CondCode CC)
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic,...
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask)
static bool isReverseMask(ArrayRef< int > M, EVT VT)
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v,...
static SDValue PerformSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc)
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool CanInvertMVEVCMP(SDValue N)
static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG)
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static EVT getVectorTyFromPredicateVector(EVT VT)
static SDValue PerformFADDVCMLACombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
static bool isSRL16(const SDValue &Op)
static SDValue PerformVMOVrhCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC)
static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr, SDValue Inc, const SelectionDAG &DAG)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static Register genTPEntry(MachineBasicBlock *TpEntry, MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpExit, Register OpSizeReg, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI)
Adds logic in loop entry MBB to calculate loop iteration count and adds t2WhileLoopSetup and t2WhileL...
static bool isLTorLE(ISD::CondCode CC)
static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG)
static SDValue PerformBITCASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG)
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal,...
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE.
static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static cl::opt< unsigned > ConstpoolPromotionMaxTotal("arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128))
static SDValue LowerTruncatei1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static RTLIB::Libcall getDivRemLibcall(const SDNode *N, MVT::SimpleValueType SVT)
static SDValue PerformABSCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static const MCPhysReg GPRArgRegs[]
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT, SelectionDAG &DAG)
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformORCombineToSMULWBT(SDNode *OR, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v,...
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue FindBFIToCombineWith(SDNode *N)
static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, SelectionDAG &DAG)
ShuffleOpCodes
@ OP_VEXT3
@ OP_VTRNR
@ OP_VDUP1
@ OP_VZIPR
@ OP_VUZPR
@ OP_VREV
@ OP_VZIPL
@ OP_VTRNL
@ OP_COPY
@ OP_VEXT1
@ OP_VDUP0
@ OP_VEXT2
@ OP_VUZPL
@ OP_VDUP3
@ OP_VDUP2
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isS16(const SDValue &Op, SelectionDAG &DAG)
static bool isSRA16(const SDValue &Op)
static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, const SDLoc &DL, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG)
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isSHL16(const SDValue &Op)
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
static SDValue PerformMVEVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isTruncMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
static bool isLegalMVEShuffleOp(unsigned PFEntry)
static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N, SelectionDAG &DAG)
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, ANY_EXTEND,...
static bool isVMOVNTruncMask(ArrayRef< int > M, EVT ToVT, bool rev)
static SDValue PerformVQMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
static SDValue LowerVecReduceMinMax(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformAddcSubcCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static TargetLowering::ArgListTy getDivRemArgList(const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget)
static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static ARMCC::CondCodes getVCMPCondCode(SDValue N)
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformORCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVSetCCToVCTPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isZeroVector(SDValue N)
static SDValue PerformAddeSubeCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG)
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
static bool isVMOVNMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics,...
static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorExtend(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain)
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG)
static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, SelectionDAG &DAG)
static unsigned SelectPairHalf(unsigned Elements, ArrayRef< int > Mask, unsigned Index)
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG)
static bool isValidBaseUpdate(SDNode *N, SDNode *User)
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl)
static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op)
static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment, bool isSEXTLoad, bool IsMasked, bool isLE, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
std::pair< unsigned, const TargetRegisterClass * > RCPair
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,...
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, EVT VectorVT, VMOVModImmType type)
isVMOVModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG)
BC is a bitcast that is about to be turned into a VMOVDRR.
static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl)
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B)
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, struct BaseUpdateUser &User, bool SimpleConstIncOnly, TargetLowering::DAGCombinerInfo &DCI)
static bool allUsersAreInFunction(const Value *V, const Function *F)
Return true if all users of V are within function F, looking through ConstantExprs.
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, SDValue &SatK)
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
static SDValue CombineANDShift(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDECombine - Target-specific dag combine transform from ARMISD::ADDC, ARMISD::ADDE,...
static SDValue PerformReduceShuffleCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerTruncate(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformHWLoopCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformSplittingMVETruncToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v,...
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations of ...
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG)
static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, bool &Negate)
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
static bool IsVUZPShuffleNode(SDNode *N)
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node)
Attaches vregs to MEMCPY that it will use as scratch registers when it is expanded into LDM/STM.
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
static SDValue findMUL_LOHI(SDValue V)
static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformORCombine_i1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformSplittingMVEEXTToWideningLoad(SDNode *N, SelectionDAG &DAG)
static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG)
static void genTPLoopBody(MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI, Register OpSrcReg, Register OpDestReg, Register ElementCountReg, Register TotalIterationsReg, bool IsMemcpy)
Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and t2DoLoopEnd.
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformMinMaxCombine - Target-specific DAG combining for creating truncating saturates.
This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine.
Function Alias Analysis Results
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:236
static bool isSigned(unsigned int Opcode)
#define Check(C,...)
#define op(i)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Value * LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctpop of V before the specified instruction IP.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
loop Loop Strength Reduction
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
nvptx lower args
uint64_t High
IntegerType * Int32Ty
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
PowerPC Reduce CR logical Operation
const char LLVMTargetMachineRef TM
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This file describes how to lower LLVM code to machine code.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static constexpr int Concat[]
Value * RHS
Value * LHS
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1620
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1548
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
unsigned logBase2() const
Definition: APInt.h:1703
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1606
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
virtual const ARMBaseRegisterInfo & getRegisterInfo() const =0
const uint32_t * getSjLjDispatchPreservedMask(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
Register getFrameRegister(const MachineFunction &MF) const override
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
const uint32_t * getTLSCallPreservedMask(const MachineFunction &MF) const
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
SmallPtrSet< const GlobalVariable *, 2 > & getGlobalsPromotedToConstantPool()
void setArgumentStackToRestore(unsigned v)
void setPromotedConstpoolIncrease(int Sz)
void setArgRegsSaveSize(unsigned s)
void setReturnRegsCount(unsigned s)
void setVarArgsFrameIndex(int Index)
unsigned getArgRegsSaveSize() const
void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV)
Indicate to the backend that GV has had its storage changed to inside a constant pool.
void setArgumentStackSize(unsigned size)
unsigned getArgumentStackSize() const
bool isTargetMachO() const
Definition: ARMSubtarget.h:312
bool useMovt() const
bool isTargetAEABI() const
Definition: ARMSubtarget.h:321
bool hasARMOps() const
Definition: ARMSubtarget.h:265
bool supportsTailCall() const
Definition: ARMSubtarget.h:399
const Triple & getTargetTriple() const
Definition: ARMSubtarget.h:298
bool hasVFP4Base() const
Definition: ARMSubtarget.h:273
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:196
bool isThumb1Only() const
Definition: ARMSubtarget.h:364
bool useFPVFMx() const
Definition: ARMSubtarget.h:282
bool hasFPARMv8Base() const
Definition: ARMSubtarget.h:274
bool isThumb2() const
Definition: ARMSubtarget.h:365
bool isTargetWindows() const
Definition: ARMSubtarget.h:308
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
bool hasBaseDSP() const
Definition: ARMSubtarget.h:288
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:200
bool useSjLjEH() const
Definition: ARMSubtarget.h:287
bool isTargetDarwin() const
Definition: ARMSubtarget.h:300
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:208
bool hasVFP2Base() const
Definition: ARMSubtarget.h:271
bool isTargetAndroid() const
Definition: ARMSubtarget.h:350
bool isROPI() const
bool isTargetCOFF() const
Definition: ARMSubtarget.h:310
bool isTargetGNUAEABI() const
Definition: ARMSubtarget.h:326
bool hasVFP3Base() const
Definition: ARMSubtarget.h:272
bool isAPCS_ABI() const
bool useFPVFMx64() const
Definition: ARMSubtarget.h:286
bool isTargetWatchOS() const
Definition: ARMSubtarget.h:302
bool hasMinSize() const
Definition: ARMSubtarget.h:363
bool isTargetIOS() const
Definition: ARMSubtarget.h:301
bool useNEONForSinglePrecisionFP() const
Definition: ARMSubtarget.h:267
const InstrItineraryData * getInstrItineraryData() const override
getInstrItins - Return the instruction itineraries based on subtarget selection.
Definition: ARMSubtarget.h:433
bool isTargetWatchABI() const
Definition: ARMSubtarget.h:303
bool hasAnyDataBarrier() const
Definition: ARMSubtarget.h:276
bool isTargetDriverKit() const
Definition: ARMSubtarget.h:304
bool isAAPCS_ABI() const
bool isRWPI() const
bool isLittle() const
Definition: ARMSubtarget.h:407
bool allowsUnalignedMem() const
Definition: ARMSubtarget.h:401
bool isTargetMuslAEABI() const
Definition: ARMSubtarget.h:331
bool isTargetLinux() const
Definition: ARMSubtarget.h:305
bool useFPVFMx16() const
Definition: ARMSubtarget.h:285
bool isMClass() const
Definition: ARMSubtarget.h:366
unsigned getPrefLoopLogAlignment() const
Definition: ARMSubtarget.h:486
bool isTargetHardFloat() const
bool useMulOps() const
Definition: ARMSubtarget.h:280
bool isTargetELF() const
Definition: ARMSubtarget.h:311
Align getDualLoadStoreAlignment() const
Definition: ARMSubtarget.h:443
bool isReadOnly(const GlobalValue *GV) const
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Align getABIAlignmentForCallingConv(Type *ArgTy, const DataLayout &DL) const override
Return the correct alignment for the current calling convention.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
const ARMSubtarget * getSubtarget() const
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Returns true if the addressing mode representing by AM is legal for the Thumb1 target,...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
SDValue PerformMVETruncCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize=false) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
bool ExpandInlineAsm(CallInst *CI) const override
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
Value * createComplexDeinterleavingIR(IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
SDValue PerformMVEExtCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &OriginalDemandedBits, const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const override
getRegClassFor - Return the register class that should be used for the specified value type.
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vstN intrinsic.
ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI)
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const
PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const override
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
Instruction * makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy, Idx).
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vldN intrinsic.
bool useSoftFloat() const override
bool alignLoopsWithOptSize() const override
Should loops be aligned even when the function is marked OptSize (but not MinSize).
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
bool isFloatingPointOperation() const
Definition: Instructions.h:922
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:93
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
The address of a basic block.
Definition: Constants.h:889
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
CCState - This class holds information needed while lowering arguments and return values.
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
void rewindByValRegsInfo()
unsigned getInRegsParamsProcessed() const
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
unsigned getInRegsParamsCount() const
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1735
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1819
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1871
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:705
const APFloat & getValueAPF() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
Align getStackAlignment() const
Definition: DataLayout.h:271
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
Align getPreferredAlign(const GlobalVariable *GV) const
Returns the preferred alignment of the specified global.
StringRef getPrivateGlobalPrefix() const
Definition: DataLayout.h:332
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
bool empty() const
Definition: DenseMap.h:98
iterator begin()
Definition: DenseMap.h:75
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:539
unsigned getNumElements() const
Definition: DerivedTypes.h:582
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:202
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:264
arg_iterator arg_begin()
Definition: Function.h:818
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
bool hasStructRetAttr() const
Determine if the function returns a structure through first or second pointer argument.
Definition: Function.h:666
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:215
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:677
const GlobalValue * getGlobal() const
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:278
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:631
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2137
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1881
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2122
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1437
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1416
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2494
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2117
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1497
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:569
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2153
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
bool isEmpty() const
Returns true if there are no itineraries.
bool hasAtomicStore() const LLVM_READONLY
Return true if this atomic instruction stores to memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
const BasicBlock * getParent() const
Definition: Instruction.h:152
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
static bool LowerToByteSwap(CallInst *CI)
Try to replace a call instruction with a call to a bswap intrinsic.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:184
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:286
Value * getPointerOperand()
Definition: Instructions.h:280
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:600
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:40
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:585
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void moveAfter(MachineBasicBlock *NewBefore)
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void computeMaxCallFrameSize(MachineFunction &MF, std::vector< MachineBasicBlock::iterator > *FrameSDOps=nullptr)
Computes the maximum size of a callframe.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getFunctionContextIndex() const
Return the index for the function context object.
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & reset(Property P)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i....
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
@ EK_Inline
EK_Inline - Jump table entries are emitted inline at their point of use.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This class is used to represent an MLOAD node.
This class is used to represent an MSTORE node.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isVolatile() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:130
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< use_iterator > uses()
size_t use_size() const
Return the number of uses of this node.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Return true if the type of the node type undefined.
void setFlags(SDNodeFlags NewFlags)
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:474
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
const unsigned char * bytes_end() const
Definition: StringRef.h:118
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
const unsigned char * bytes_begin() const
Definition: StringRef.h:115
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:373
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC)
Override the default CondCode to be used to test the result of the comparison libcall against zero.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
const TargetMachine & getTargetMachine() const
void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC)
Set the CallingConv that should be used for the specified libcall.
void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked load does or does not work with the specified type and ind...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
virtual unsigned getMaxSupportedInterleaveFactor() const
Get the maximum supported factor for interleaved memory accesses.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked store does or does not work with the specified type and in...
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
bool isPositionIndependent() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
ObjectFormatType getObjectFormat() const
Get the object format for this triple.
Definition: Triple.h:398
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:662
bool isOSVersionLT(unsigned Major, unsigned Minor=0, unsigned Micro=0) const
Helper function for doing comparisons against version numbers included in the target triple.
Definition: Triple.h:495
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:629
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:252
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
Type * getArrayElementType() const
Definition: Type.h:404
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
void dump() const
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:216
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
Type * getElementType() const
Definition: DerivedTypes.h:436
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
@ SECREL
Thread Pointer Offset.
@ SBREL
Section Relative (Windows TLS)
@ GOTTPOFF
Global Offset Table, PC Relative.
@ TPOFF
Global Offset Table, Thread Pointer Offset.
TOF
Target Operand Flag enum.
Definition: ARMBaseInfo.h:242
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
Definition: ARMBaseInfo.h:288
@ MO_SBREL
MO_SBREL - On a symbol operand, this represents a static base relative relocation.
Definition: ARMBaseInfo.h:270
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:275
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:266
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: ARMBaseInfo.h:263
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
unsigned getAM2Offset(unsigned AM2Opc)
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned createVMOVModImm(unsigned OpCmode, unsigned Val)
int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
int getFP32FP16Imm(const APInt &Imm)
If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding for it.
AddrOpc getAM2Op(unsigned AM2Opc)
bool isBitFieldInvertedMask(unsigned v)
const unsigned FPStatusBits
const unsigned FPReservedBits
const unsigned RoundingBitsPos
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ ARM_APCS
ARM Procedure Calling Standard (obsolete, but still used on some targets).
Definition: CallingConv.h:107
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:82
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ ARM_AAPCS
ARM Architecture Procedure Calling Standard calling convention (aka EABI).
Definition: CallingConv.h:111
@ CXX_FAST_TLS
Used for access functions.
Definition: CallingConv.h:72
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition: CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
@ ARM_AAPCS_VFP
Same as ARM_AAPCS, but uses hard floating point ABI.
Definition: CallingConv.h:114
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:237
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1133
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1129
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:724
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:477
@ SET_FPENV
Sets the current floating-point environment.
Definition: ISDOpcodes.h:1005
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1377
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:147
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:498
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:251
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1276
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:560
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:715
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1162
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1278
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1248
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1279
@ RESET_FPENV
Set floating-point environment to default state.
Definition: ISDOpcodes.h:1009
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ SET_FPMODE
Sets the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1028
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:484
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ EH_SJLJ_SETUP_DISPATCH
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here.
Definition: ISDOpcodes.h:151
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:791
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:544
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1362
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:391
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:689
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1240
@ RESET_FPMODE
Sets default dynamic floating-point control modes.
Definition: ISDOpcodes.h:1032
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:256
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1376
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:478
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:914
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1274
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:230
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1275
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1407
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:886
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:663
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:621
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1054
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1359
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:723
@ WRITE_REGISTER
Definition: ISDOpcodes.h:119
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1228
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1363
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:995
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:759
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:931
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1084
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:328
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1277
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1063
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:350
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:728
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1244
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1378
@ RegisterMask
Definition: ISDOpcodes.h:75
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:223
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1158
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:209
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:324
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1371
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:881
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:652
@ GET_FPMODE
Reads the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1023
@ GET_FPENV
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1000
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:601
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1272
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:574
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:118
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1218
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:857
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:743
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1255
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1280
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:972
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:332
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1048
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:675
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:889
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:737
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:304
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1379
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1270
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:444
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:466
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:443
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:991
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1271
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:837
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1189
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:471
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1215
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:658
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1360
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:525
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1269
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:870
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
Definition: ISDOpcodes.h:106
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:856
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:141
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1153
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1077
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:764
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:494
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:341
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:516
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1563
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1479
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1530
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1510
static const int LAST_INDEXED_MODE
Definition: ISDOpcodes.h:1481
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1471
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:593
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:239
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:251
bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
void shuffle(Iterator first, Iterator last, RNG &&g)
Definition: STLExtras.h:1541
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:263
bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_Win32_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1312
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ MVEVMVNModImm
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
CombineLevel
Definition: DAGCombine.h:15
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:244
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
unsigned gettBLXrOpcode(const MachineFunction &MF)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Load/store instruction that can be merged with a base address update.
SDNode * N
Instruction that updates a pointer.
unsigned ConstInc
Pointer increment value if it is a constant, or 0 otherwise.
SDValue Inc
Pointer increment operand.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:448
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:628
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:438
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:198
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:297
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:63
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition: KnownBits.cpp:57
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:777
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoSignedZeros() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setInRegister(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)